In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

# plotly
import plotly.graph_objects as go
import plotly.express as px 

![Poster](https://california-times-brightspot.s3.amazonaws.com/35/37/67b311bb4dfebd84cd54033f010c/v2-frs-040721-tp-00672.jpg)

In [None]:
data = pd.read_csv('/kaggle/input/friends-episodes/friends.csv')
data.head()

In [None]:
data.loc[4, 'Rating/Share']

In [None]:
for i in range(len(data)):
    val = data['Rating/Share'][i].split('/')
    data.loc[i, 'Rating/Share'] = round(float(val[0]) / float(val[1]), 4)
    
data['Rating/Share'] = pd.to_numeric(data['Rating/Share'], errors='coerce')

In [None]:
data.head()

# Rating and Views ✨

In [None]:
fig = px.scatter(
    x=data['Rating/Share'],
    y=data['US Viewership (in millions)'],
    title='Rating and Views Relationship',
    log_y=True, 
)
fig.update_yaxes(title_text='US Viewership in Millions')
fig.update_xaxes(title_text='Rating/Share')
fig.show()

### Obviously, the Rating/Share is directly proportional to the Viewership

# Best Season

In [None]:
data_season = data.groupby(by=['Season']).mean()

fig = px.line(
    x=data_season.index, 
    y=data_season['Rating/Share'], 
    text=round(data_season['Rating/Share'], 3), 
    title='Rating/Share of Each Season'
)

fig.update_traces(textposition='top center')

fig.update_layout(
    xaxis = dict(tickmode = 'linear', tick0 = 1)
)

fig.update_yaxes(title_text='Rating/Share')
fig.update_xaxes(title_text='#Season')
fig.show()

### The First Season took off with highest rating

# Busiest Director 🎬

In [None]:
data_dir_size = data.groupby(by=['Director']).size()
fig = px.histogram(
    x=data_dir_size, 
    y=data_dir_size.index,
    title='Number of Episodes directed by each Director',
    height=700
)
fig.update_yaxes(title_text='Director')
fig.update_xaxes(title_text='No. of Episodes Directed')
fig.show()

# Most Viewed Director

In [None]:
data_dir = data.groupby(by=['Director']).mean()
fig = px.histogram(
    x=data_dir['US Viewership (in millions)'],
    y=data_dir.index,height=700,
    title='Average Viewership Gained by Each Director'
)
fig.update_yaxes(title_text='Director')
fig.update_xaxes(title_text='Average Viewership (in millions)')
fig.show()

# Writer ✍

In [None]:
# get all unique writers
writers = []
for w in data['Writer']:
    lst = re.split('and |Story by |Teleplay by |, | & ',w)
    for items in lst:
        if len(items) > 0 and items not in writers: writers.append(items)

In [None]:
# get viewership for each writer
writers_viewership = {key: [] for key in writers}
for writer in writers:
    for idx in range(len(data)):
        if writer in data.iloc[idx,3]:
            writers_viewership[writer].append(data.iloc[idx,8])

In [None]:
# mean of viewership and size
no_of_episodes_writer = []
avg_viewership_writer = []
for w in writers_viewership.values():
    no_of_episodes_writer.append(np.array(w).shape[0])
    avg_viewership_writer.append(round(np.array(w).mean(), 3))

# Busiest Writer

In [None]:
fig = px.histogram(
    x=writers_viewership.keys(),
    y=no_of_episodes_writer, 
    title='Number of Episodes written by each Writer'
)
fig.update_yaxes(title_text='No. of Episodes')
fig.update_xaxes(title_text='Writer')
fig.show()

### Ted Cohen and Andrew Reich has written maximum number of episode with 25 episodes each

# Most Viewed Writer

In [None]:
fig = px.histogram(
    x=writers_viewership.keys(),
    y=avg_viewership_writer,
    width=1000, 
    title='Average Viewership Gained by Each Writer'
)
fig.update_yaxes(title_text='Average Viewership (in millions)')
fig.update_xaxes(title_text='Writer')
fig.show()

# Summary Mentions

In [None]:
friends = {'Monica':0, 'Chandler':0 ,'Ross':0, 'Rachel':0, 'Joey':0, 'Phoebe':0}

for fr in friends:
    for d in data['Summary']:
        friends[fr] += d.count(fr)

In [None]:
fig = go.Figure(
    data=[
        go.Pie(
            labels=list(friends.keys()),
            values=list(friends.values()),
            textinfo='label+percent',
        )])
fig.update_traces(textfont_size=15,marker=dict(line=dict(color='#000000', width=0.5)))

### Rachel seems to steal the show ❤ 

# Rating/Share over the years

In [None]:
data['Date Aired'] = pd.to_datetime(data['Date Aired'], dayfirst=True)

In [None]:
px.line(
    x=data['Date Aired'],
    y=data['Rating/Share'],
    title='Rating Each Episode'
).update_yaxes(title_text='Rating/Share').update_xaxes(title_text='Date')

# Top 10 Episodes by Rating/Share

In [None]:
by='Rating/Share'
fig = px.bar(data.nlargest(10, by)[['Title', by]],x='Title', y=by, color=by, text=by, height=600)

fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text=f'Top 10 Episodes by {by}', title_x=0.5, title_font=dict(size=30))
fig.update_traces(textposition='outside')
fig.show()

# Top 10 Episodes by Viewership

In [None]:
by='US Viewership (in millions)'
fig = px.bar(data.nlargest(10, by)[['Title', by]],x='Title', y=by, color=by, text=by, height=600)

fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text=f'Top 10 Episodes by {by}', title_x=0.5, title_font=dict(size=30))
fig.update_traces(textposition='outside')
fig.show()