In [261]:
# basic operations
import numpy as np
import pandas as pd
from scipy.stats import skew
pd.options.display.max_rows = 999

# Plotly
import plotly.express as px
import plotly.graph_objects as go

# SQL
%run -i "Query_Function.py"

In [236]:
# Read movie revenue data
df = pd.read_csv('movie_revenue_new_by_day.csv', parse_dates=['Date'])

In [237]:
# Set varaiables
country_in = ['FR']
country_out = ['United States']

In [238]:
# Filter by country
mask = df['Country_of_origin'].isin(country_in) & df['Country_of_market'].isin(country_out)
df = df[mask].set_index('Date')

In [239]:
# Group by month
df2 = df.groupby(['Movie_id', 'Title', 'Country_of_origin', 'Country_of_market', pd.Grouper(freq='1M')])[['Royalties']].sum()
df2 = df2.reset_index()

In [279]:
# Calculate skewness
df3 = df2.groupby('Title')['Royalties'].apply(skew).reset_index()
count = df2.groupby('Title')['Royalties'].count()
df3['Count'] = count.reset_index()['Royalties']
df3.columns = ['Title', 'One-shot', 'Count']

In [280]:
# High skewness: One shot?
# Low Skewness: Recurrent?
# 2 skewness as a threshold?

df3['One-shot'] = df3['One-shot'].round(2)-2
df3['One-shot-tag'] = True
df3.loc[df3['One-shot']<0, 'One-shot-tag'] = False
df3

Unnamed: 0,Title,One-shot,Count,One-shot-tag
0,1 chance sur 2,-0.34,21,False
1,15 ans et demi,0.41,58,True
2,20 ans d'écart,-2.0,2,False
3,600 kilos d'or pur,1.12,52,True
4,99 francs,0.65,105,True
5,A normal life. Chronicle of a sumo wrestler,2.83,61,True
6,Agathe Cléry,3.27,90,True
7,Agents Secrets,0.29,12,True
8,Agnès Letestu: L'apogée d'une étoile,-1.21,16,False
9,All of Us Guinea-pigs Now?,0.75,28,True


In [285]:
# The percentage of each type
# We can combine it with tag analysis to see which tag is more likely to result in a recurrent/one shot movie
df3['One-shot-tag'].value_counts()/len(df3)

False    0.579618
True     0.420382
Name: One-shot-tag, dtype: float64

In [281]:
# Look up the performance
movie = 'Mais qui a re-tué Pamela Rose ?'
df_plot = df2[df2['Title']==movie]
df_plot

Unnamed: 0,Movie_id,Title,Country_of_origin,Country_of_market,Date,Royalties
6831,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2013-08-31,5.38
6832,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2013-09-30,49.5
6833,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2013-10-31,59.0
6834,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2013-11-30,44.5
6835,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2013-12-31,39.5
6836,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2014-01-31,29.5
6837,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2014-02-28,14.0
6838,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2014-03-31,21.5
6839,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2014-04-30,17.0
6840,1252,Mais qui a re-tué Pamela Rose ?,FR,United States,2014-05-31,61.0


In [282]:
# Visualze the performance
fig = px.line(df_plot, x="Date", y="Royalties", title='Revenue of {}'.format(movie))
fig.show()