In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go


In [29]:
df = pd.read_csv('../data.csv', low_memory=False)
df.head()

Unnamed: 0,tconst,Title,Season,Episode,imgCount,averageRating,numVotes,startYear,Media,Device
0,tt7991508,Finestkind (2023),-1.0,-1.0,18,6.4,173,2023,Movie,iPhone
1,tt6722400,Fast Charlie (2023),-1.0,-1.0,11,9.4,96,2023,Movie,iPhone
2,tt13651794,May December (2023),-1.0,-1.0,9,7.4,1345,2023,Movie,iPhone
3,tt19890316,What Happens Later (2023),-1.0,-1.0,25,7.6,14,2023,Movie,iPhone
4,tt15744298,Freelance (2023),-1.0,-1.0,106,5.3,374,2023,Movie,iPhone


In [3]:
same_cols = ['tconst', 'Title', 'numVotes', 'startYear','Media']
agg_dict = {
    'Device':'count',
    'averageRating':'mean',
    'Season':'count',
    'Episode':'count',
    'imgCount':'mean' 
}
resolved_data = df.groupby(same_cols).agg(agg_dict).drop(columns=['Season','Device']).reset_index()
print('total titles:',resolved_data.shape[0])
resolved_data = resolved_data[resolved_data['startYear'] > 2019]
print('titles (2020 or later):',resolved_data.shape[0])
resolved_data = resolved_data[resolved_data['imgCount'] > 1]
print('titles (2020 or later w/ more than 2+ placements):',resolved_data.shape[0])
resolved_data['year'] = resolved_data['startYear'].astype('string')
#resolved_data.head(50)

total titles: 619
titles (2020 or later): 486
titles (2020 or later w/ more than 2+ placements): 334


In [4]:
fig = go.Figure()

# creating trace1
fig.add_trace(go.Scatter(
                    y = resolved_data.averageRating,
                    x = resolved_data.imgCount,
                    mode = "markers",
                    #marker_color = 'rgba(255, 128, 255, 0.5)',
                    marker=dict(color=resolved_data.startYear),
                    text= resolved_data.Title))


fig.update_layout(title = 'Average Rating vs Number of Apple Product Placements, by Year',
                  xaxis_title = 'Number of Product Placements',
                  xaxis_zeroline= False,
                  yaxis_title = 'Rating',
                  yaxis_zeroline= False)

fig.update_xaxes(title_text="Number of Product Placements (logarithmic scale)", type="log")
             
fig.show()

In [5]:
fig3 = px.scatter(
    resolved_data,
    x='imgCount',
    y='averageRating',
    color='startYear',
    category_orders={'year': resolved_data['year'].sort_values().unique()},
    hover_name='Title',
    log_x=True,
    color_continuous_scale=['gold','orange','red','purple','magenta','blue','cyan'],
    labels={
        'imgCount':'Number of Product Placements (logarithmic scale)',
        'averageRating':'Average Title Rating',
        'year':'Year'
    }
)

fig3.show()

In [6]:
fig2 = px.scatter(
    resolved_data,
    x='imgCount',
    y='averageRating',
    color='year',
    category_orders={'year': resolved_data['year'].sort_values().unique()},
    hover_name='Title',
    log_x=True,
    labels={
        'imgCount':'Number of Product Placements (logarithmic scale)',
        'averageRating':'Average Title Rating',
        'year':'Year'
    }
)

fig2.show()


In [7]:
resolved_data['startYear'].describe()

count     334.000000
mean     2021.892216
std         0.998677
min      2020.000000
25%      2021.000000
50%      2022.000000
75%      2023.000000
max      2023.000000
Name: startYear, dtype: float64

In [8]:
display(resolved_data.dtypes)

tconst                   object
Title                    object
numVotes                  int64
startYear                 int64
Media                    object
averageRating           float64
Episode                   int64
imgCount                float64
year             string[python]
dtype: object

In [9]:
fig4 = px.histogram(
    resolved_data,
    x='year',
    category_orders={'year': resolved_data['year'].sort_values().unique()}
)
fig4.show()

In [30]:
fig5 = px.pie(
    df.groupby(['Device'])['Device'].count().reset_index(name='count'),
    values='count',
    names='Device',
    title='Apple Products Placed in Titles, Grouped by Device'
)
fig5.show()

In [11]:
df2 = df.groupby(['Device'])['Device'].count().reset_index(name='count')
df2.head()

Unnamed: 0,Device,count
0,AirPods,35
1,Apple Watch,227
2,MacBook,1755
3,iMac,573
4,iPad,361


In [12]:
same_cols2 = ['startYear','Device']
agg_dict2 = {
    'imgCount':'count', # add radio button to swap this b/w count, mean, sum
    'averageRating':'mean' 
}#
resolved_data2 = df.drop(columns=['tconst','Title','numVotes','Media','Season','Episode'])
resolved_data2 = df.groupby(same_cols2).agg(agg_dict2).reset_index()


In [13]:
resolved_data2.head(30)

Unnamed: 0,startYear,Device,imgCount,averageRating
0,2001,MacBook,1,6.2
1,2003,iMac,1,7.2
2,2003,iPad,1,7.8
3,2003,iPhone,5,7.92
4,2005,iMac,1,7.2
5,2005,iPad,4,7.2
6,2005,iPhone,3,7.466667
7,2006,MacBook,3,6.0
8,2006,iMac,1,5.0
9,2006,iPhone,3,6.133333


In [14]:
fig5 = px.line(
    resolved_data2,
    x='startYear',
    y='imgCount',
    color='Device',
    markers=True
)
fig5.show()

In [15]:
df.head()

Unnamed: 0,tconst,Title,Season,Episode,imgCount,averageRating,numVotes,startYear,Media,Device
0,tt7991508,Finestkind (2023),-1.0,-1.0,18,6.4,173,2023,Movie,iPhone
1,tt6722400,Fast Charlie (2023),-1.0,-1.0,11,9.4,96,2023,Movie,iPhone
2,tt13651794,May December (2023),-1.0,-1.0,9,7.4,1345,2023,Movie,iPhone
3,tt19890316,What Happens Later (2023),-1.0,-1.0,25,7.6,14,2023,Movie,iPhone
4,tt15744298,Freelance (2023),-1.0,-1.0,106,5.3,374,2023,Movie,iPhone


In [28]:
years = df['startYear'].unique()
years = np.sort(years)
print(years)

[2001 2003 2005 2006 2007 2008 2009 2011 2012 2013 2014 2015 2016 2017
 2018 2019 2020 2021 2022 2023]
