# Creating figure

### Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Load data

In [None]:
# Load dataframe
df = pd.read_pickle('full_w_sentiment.pkl')

# Convert relevant variables to floats and drop not available
df['audienceScore'] = df['audienceScore'].astype(float)
df['tomatoMeter'] = df['tomatoMeter'].astype(float)
df = df.dropna(subset = ['audienceScore', 'tomatoMeter', 'averageRating'])

# Make log transformation of box office
df['logBoWorldwide'] = np.log(df['boWorldwide'])

# Convert box office into millions
df['boWorldwide'] = df['boWorldwide']/10**6

### Make histograms

In [None]:
# Use style
plt.style.use('seaborn-deep')

# Make figure
fig = plt.figure(constrained_layout = True, figsize = (8, 6))
gs = fig.add_gridspec(2, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])

# Make histograms
ax1.hist(df['tomatoMeter'], bins = 10, range = (0, 100))
ax2.hist(df['averageRating'], bins = 20, range = (0, 10))
ax3.hist(df['boWorldwide'], bins = 16, range = (0, 800))

# Set x-axis ticks
ax1.set_xticks(np.arange(0, 110, step = 10))
ax2.set_xticks(np.arange(0, 11, step = 1))
ax3.set_xticks(np.arange(0, 900, step = 100))

# Set titles
ax1.set_title('(A)  RT Tomatometer')
ax2.set_title('(B)  IMDb rating')
ax3.set_title('(C)  Worldwide box office')

# Set labels
ax3.set_xlabel('Millions USD')

# Save figure
plt.savefig('histograms.pdf', bbox_inches = 'tight')

### Scatterplots

In [None]:
# Make figure
fig, axes = plt.subplots(1, 3, figsize = (8, 3))

# Drop not available
df1 = df.dropna(subset = ['sentimentBefore', 'tomatoMeter'])
df2 = df.dropna(subset = ['sentimentBefore', 'averageRating'])
df3 = df.dropna(subset = ['sentimentBefore', 'boWorldwide'])

# Make regression plots
sns.regplot(x = df1[x_var], y = df1['tomatoMeter'], ax = axes[0], ci = None, scatter_kws={'alpha': 0.2})
sns.regplot(x = df1[x_var], y = df1['averageRating'], ax = axes[1], ci = None, scatter_kws={'alpha': 0.2})
sns.regplot(x = df1[x_var], y = df1['logBoWorldwide'], ax = axes[2], ci = None, scatter_kws={'alpha': 0.2})

# Set x-axis labels
axes[0].set_xlabel('Sentiment before release')
axes[1].set_xlabel('Sentiment before release')
axes[2].set_xlabel('Sentiment before release')

# Set y-axis labels
axes[0].set_ylabel('RT Tomatometer')
axes[1].set_ylabel('IMDb rating')
axes[2].set_ylabel('Log worldwide box office')

# Set titles
axes[0].set_title('(A)  Correlation with \n RT Tomatometer')
axes[1].set_title('(B)  Correlation with \n IMDb rating')
axes[2].set_title('(C)  Correlation with \n box office')

# Set y-axis limits
axes[2].set_ylim([0, 25])

# Adjust spacing
plt.subplots_adjust(wspace = 0.35)

# Save figure
plt.savefig('scatters.pdf', bbox_inches = 'tight')