In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

games_df = pd.read_csv('../data/indie_horror_games_data.csv')

# Data Cleaning
games_df.dropna(inplace=True)
games_df['average_forever'] = games_df['average_forever'].astype(int)
games_df['price'] = games_df['price'].astype(float)
games_df['reviews'] = games_df['positive'] + games_df['negative']
games_df['estimated_revenue'] = games_df['reviews'] * games_df['price']

# Data Analysis
games_df['review_quantile'] = pd.qcut(games_df['reviews'], 10, labels=False)
games_df['revenue_quantile'] = pd.qcut(games_df['estimated_revenue'], 10, labels=False)

review_stats = games_df.groupby('review_quantile').agg({
    'reviews': ['mean', 'median'],
    'estimated_revenue': ['mean', 'median'],
    'price': ['mean', 'median']
}).reset_index()

review_stats.columns = ['review_quantile', 'mean_reviews', 'median_reviews', 'mean_estimated_revenue', 'median_estimated_revenue', 'mean_price', 'median_price']

# Data Visualization
fig, ax1 = plt.subplots(figsize=(12, 6))

color = 'tab:blue'
ax1.set_xlabel('Review Quantile')
ax1.set_ylabel('Average Reviews', color=color)
sns.barplot(x=review_stats['review_quantile'], y=review_stats['mean_reviews'], ax=ax1, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:green'
ax2.set_ylabel('Average Estimated Revenue', color=color)
sns.lineplot(x=review_stats['review_quantile'], y=review_stats['mean_estimated_revenue'], ax=ax2, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()
plt.title('Average Reviews and Estimated Revenue by Review Quantile')
plt.show()
