In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [14]:
def tidy_split(df, column, sep=',', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value.strip())
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df

In [15]:
def sale_activity(df, suffix=''):
    sale_act = tidy_split(df, 'Previous Sale Activity', sep='\n', keep=False)
    sale_act['Previous Sale Activity'] = sale_act['Previous Sale Activity'].str.replace('.', '')
    sale_activity_enddates = sale_act['Previous Sale Activity'].str[-11:]
    date_dict = {'ene': '01', 'feb': '02', 'mar': '03', 'abr': '04', 'may':'05', 'jun': '06', 'jul': '07', 'ago': '08', 'sep':'09', 'oct': '10', 'nov': '11', 'dic': '12'}
    sale_activity_enddates = sale_activity_enddates.replace(date_dict, regex=True)
    sale_activity_enddates = sale_activity_enddates.replace('own-Unknown', np.nan).astype('datetime64')
    sale_act['end_dates'] = sale_activity_enddates
    sale_act['end_dates'].fillna(sale_act['Acq. Expires'], inplace=True)
    sale_act['client'] = sale_act['Previous Sale Activity'].str[:-25]
    sale_act = sale_act.pivot_table(index='Unique Id', values='end_dates', columns='client', aggfunc=max)
    return df.join(sale_act, on='Unique Id', how='left').sort_values(by='Unique Id'), [str(col) + suffix for col in sale_act.columns]

In [16]:
ftv_avails = pd.read_excel('C:/Users/aleja/Downloads/Availability by Territory with Reissues - Free TV (22).xlsx', skiprows=1)

In [17]:
screeners = pd.read_excel('Z:\LEDAFILMS\Alteryx\Filmtracks\Project Data ID.xlsx')
screeners.dropna(axis=0, subset=['Unique Identifier'], inplace=True)
screeners['Unique Id'] = screeners['Unique Identifier'].astype(int)
screeners.drop(['Unique Identifier', 'Title', 'Web Site'], axis = 1, inplace=True)

ratings = pd.read_excel('Z:\LEDAFILMS\Alteryx\Filmtracks\Ratings & Titles.xls')
ratings.dropna(axis=0, subset=['Unique Identifier'], inplace=True)
ratings['Unique Id'] = ratings['Unique Identifier'].astype(int)
ratings.drop(['Unique Identifier', 'Title', 'Imdb'], axis = 1, inplace=True)

In [33]:
df = ftv_avails
df['Year'] = df['Year Completed']
mask = df['Holdback'] <= dt.datetime.today()
df['Holdback'].loc[mask] = pd.NaT

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [34]:
df['First Run or Library'] = df['Is Reissue?'].fillna('First Run')
df['First Run or Library'] = df['First Run or Library'].map({'Yes': 'Library', 'First Run': 'First Run'})

In [35]:
df, sales = sale_activity(df)
df = pd.merge(df, screeners, on='Unique Id', how='left')
df = pd.merge(df, ratings, on='Unique Id', how='left')

In [36]:
df[sales] = df[sales].apply(lambda x: x.apply(lambda x: str(x)[:10] if str(x) != 'nan' else ''))
df[[col for col in df.columns if 'Available' in col]] = df[[col for col in df.columns if 'Available' in col]].apply(lambda x: x.apply(lambda x: pd.Timestamp(x) if type(x) == int else x))
df[[col for col in df.columns if 'Available' in col]] = df[[col for col in df.columns if 'Available' in col]].apply(lambda x: x.apply(lambda x: str(x).replace(' 00:00:00', '')))
df[[col for col in df.columns if 'Holdback' in col]] = df[[col for col in df.columns if 'Holdback' in col]].apply(lambda x: x.apply(lambda x: str(x).replace(' 00:00:00', '')))
df = df.apply(lambda x: x.apply(lambda x: '' if str(x) == 'NaT' else x))

In [42]:
cols_ordered = list(df.columns)
cols2drop = ['AKA 1', 'AKA 2', 'Original Language', 'Previous Sale Activity', 'Unique Id']
for col in cols2drop:
    cols_ordered.remove(col)

In [44]:
df[cols_ordered].head()

Unnamed: 0,Territory,Title,Cast Member,Dialogue Language,Director,Genre,Original Format,Project Type,Subtitle Language,Synopsis,...,El Salvador,Guatemala,Honduras,Nicaragua,Panama,Paraguay,Peru,Dominican Republic,Uruguay,Venezuela
0,Dominican Republic,Robot Overlords,"Ben Kingsley, Gillian Anderson, Callan McAuliffe","Portuguese (Cleared), Spanish, English",Jon Wright,"Action, Adventure, Sci-Fi",HD File,Film,"Portuguese, Spanish",Earth has been conquered by Robots from a dist...,...,,,,,,,A,,,B
1,El Salvador,Robot Overlords,"Ben Kingsley, Gillian Anderson, Callan McAuliffe","Portuguese (Cleared), Spanish, English",Jon Wright,"Action, Adventure, Sci-Fi",HD File,Film,"Portuguese, Spanish",Earth has been conquered by Robots from a dist...,...,,,,,,,A,,,B
2,Venezuela,Robot Overlords,"Ben Kingsley, Gillian Anderson, Callan McAuliffe","Portuguese (Cleared), Spanish, English",Jon Wright,"Action, Adventure, Sci-Fi",HD File,Film,"Portuguese, Spanish",Earth has been conquered by Robots from a dist...,...,,,,,,,A,,,B
3,Honduras,Robot Overlords,"Ben Kingsley, Gillian Anderson, Callan McAuliffe","Portuguese (Cleared), Spanish, English",Jon Wright,"Action, Adventure, Sci-Fi",HD File,Film,"Portuguese, Spanish",Earth has been conquered by Robots from a dist...,...,,,,,,,A,,,B
4,Costa Rica,Robot Overlords,"Ben Kingsley, Gillian Anderson, Callan McAuliffe","Portuguese (Cleared), Spanish, English",Jon Wright,"Action, Adventure, Sci-Fi",HD File,Film,"Portuguese, Spanish",Earth has been conquered by Robots from a dist...,...,,,,,,,A,,,B
