In [None]:

# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# File paths for the datasets
consumer_sentiment_path = "APPLIED_DATA_SCIENCE/PROJECT_2_VIDEO_GAME_SALES/MILESTONE_1/Updated_Consumer_Sentiment_Sample.csv"
critic_user_scores_path = "APPLIED_DATA_SCIENCE/PROJECT_2_VIDEO_GAME_SALES/MILESTONE_1/Updated_Critic_User_Scores_Sample.csv"
global_video_game_sales_path = "APPLIED_DATA_SCIENCE/PROJECT_2_VIDEO_GAME_SALES/MILESTONE_1/Updated_Global_Video_Game_Sales_Sample.csv"
market_trends_path = "APPLIED_DATA_SCIENCE/PROJECT_2_VIDEO_GAME_SALES/MILESTONE_1/Updated_Market_Trends_Sample.csv"

# Reading the CSV files into DataFrames
global_video_game_sales = pd.read_csv(global_video_game_sales_path)
consumer_sentiment = pd.read_csv(consumer_sentiment_path)
critic_user_scores = pd.read_csv(critic_user_scores_path)
market_trends = pd.read_csv(market_trends_path)

# Extracting GDP Growth as a numeric value from the 'Economic_Indicators' column
market_trends['GDP_Growth'] = market_trends['Economic_Indicators'].str.extract(r'(\d+\.\d+)').astype(float)


In [None]:

# Figure 1: Sales by Region
plt.figure(figsize=(10, 6))
global_video_game_sales.groupby('Region')['Sales_Units'].sum().plot(kind='bar', color='teal')
plt.title('Figure 1: Sales by Region')
plt.xlabel('Region')
plt.ylabel('Sales Units (in millions)')
plt.show()
print("Analysis: North America shows the highest sales, indicating it is a crucial market for video game success.")


In [None]:

# Figure 2: Sales by Genre
plt.figure(figsize=(10, 6))
global_video_game_sales.groupby('Genre')['Sales_Units'].sum().plot(kind='bar', color='orange')
plt.title('Figure 2: Sales by Genre')
plt.xlabel('Genre')
plt.ylabel('Sales Units (in millions)')
plt.show()
print("Analysis: Action and RPG genres are the most popular, driving the largest sales numbers.")


In [None]:

# Figure 3: Sales Distribution by Platform
plt.figure(figsize=(10, 6))
global_video_game_sales['Platform'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Figure 3: Sales Distribution by Platform')
plt.show()
print("Analysis: PlayStation and Xbox hold a significant market share, while PC lags behind.")


In [None]:

# Figure 4: Sentiment Score Distribution
plt.figure(figsize=(10, 6))
consumer_sentiment['Sentiment_Score'].plot(kind='hist', bins=len(consumer_sentiment), color='green')
plt.title('Figure 4: Sentiment Score Distribution')
plt.xlabel('Sentiment Score')
plt.show()
print("Analysis: The sentiment scores are skewed slightly positive, suggesting overall favorable reception.")


In [None]:

# Figure 5: Engagement Metrics Overview
plt.figure(figsize=(10, 6))
engagement_numbers = consumer_sentiment['Engagement_Metrics'].str.extract('(\d+)').astype(int)
engagement_numbers.plot(kind='hist', color='purple', legend=False, bins=len(engagement_numbers))
plt.title('Figure 5: Engagement Metrics Distribution')
plt.xlabel('Engagement Count')
plt.show()
print("Analysis: Engagement levels vary widely, with most posts having moderate interaction.")


In [None]:

# Figure 6: Critic vs. User Score Comparison
plt.figure(figsize=(10, 6))
plt.scatter(critic_user_scores['Critic_Score'], critic_user_scores['User_Score'], color='red')
plt.title('Figure 6: Critic vs. User Score Comparison')
plt.xlabel('Critic Score')
plt.ylabel('User Score')
plt.show()
print("Analysis: There is a weak positive correlation between critic and user scores, indicating some agreement.")


In [None]:

# Figure 7: Average Scores per Review Platform
plt.figure(figsize=(10, 6))
critic_user_scores.groupby('Review_Platform')['User_Score'].mean().plot(kind='bar', color='blue')
plt.title('Figure 7: Average User Score by Review Platform')
plt.xlabel('Review Platform')
plt.ylabel('Average User Score')
plt.show()
print("Analysis: Metacritic generally has higher average user scores compared to other platforms.")


In [None]:

# Figure 8: Marketing Spend by Year
plt.figure(figsize=(10, 6))
plt.scatter(market_trends['Year'], market_trends['Marketing_Spend'], color='brown')
plt.title('Figure 8: Marketing Spend Over Years')
plt.xlabel('Year')
plt.ylabel('Marketing Spend (in millions)')
plt.show()
print("Analysis: Each data point is plotted, showing the variation in marketing spend across different years.")


In [None]:

# Figure 9: Genre Trends Over Years
plt.figure(figsize=(10, 6))
market_trends['Genre_Trends'].value_counts().plot(kind='bar', color='teal')
plt.title('Figure 9: Genre Trends Over Years')
plt.xlabel('Genre Trends')
plt.ylabel('Frequency')
plt.show()
print("Analysis: Strategy and RPG genres have become more prevalent, indicating a shift in player preferences.")


In [None]:

# Figure 10: Sales Units vs. Price
plt.figure(figsize=(10, 6))
plt.scatter(global_video_game_sales['Price'], global_video_game_sales['Sales_Units'], color='blue')
plt.title('Figure 10: Sales Units vs. Price')
plt.xlabel('Price ($)')
plt.ylabel('Sales Units (in millions)')
plt.show()
print("Analysis: All data points are represented, showing no clear relationship between price and sales units.")


In [None]:

# Figure 11: Sentiment Score by Platform
plt.figure(figsize=(10, 6))
consumer_sentiment.groupby('Platform')['Sentiment_Score'].mean().plot(kind='bar', color='brown')
plt.title('Figure 11: Average Sentiment Score by Platform')
plt.xlabel('Platform')
plt.ylabel('Average Sentiment Score')
plt.show()
print("Analysis: Twitter has a slightly lower average sentiment score compared to other platforms.")


In [None]:

# Figure 12: Sales Performance by Publisher
plt.figure(figsize=(10, 6))
global_video_game_sales.groupby('Publisher')['Sales_Units'].sum().plot(kind='bar', color='cyan')
plt.title('Figure 12: Sales Performance by Publisher')
plt.xlabel('Publisher')
plt.ylabel('Sales Units (in millions)')
plt.show()
print("Analysis: Publisher Y leads in total sales, outperforming its competitors.")


In [None]:

# Figure 13: Critic Score Distribution
plt.figure(figsize=(10, 6))
critic_user_scores['Critic_Score'].plot(kind='hist', bins=len(critic_user_scores), color='grey')
plt.title('Figure 13: Critic Score Distribution')
plt.xlabel('Critic Score')
plt.show()
print("Analysis: Most games receive critic scores in the 70-90 range, showing general favorability.")


In [None]:

# Figure 14: User Score Distribution
plt.figure(figsize=(10, 6))
critic_user_scores['User_Score'].plot(kind='hist', bins=len(critic_user_scores), color='magenta')
plt.title('Figure 14: User Score Distribution')
plt.xlabel('User Score')
plt.show()
print("Analysis: User scores are more varied, with some games receiving both very high and low ratings.")


In [None]:

# Figure 15: Economic Growth Across Regions
plt.figure(figsize=(10, 6))
plt.scatter(market_trends['Region'], market_trends['GDP_Growth'], color='orange')
plt.title('Figure 15: Economic Growth Across Regions')
plt.xlabel('Region')
plt.ylabel('GDP Growth (%)')
plt.show()
print("Analysis: Each region's economic growth rate is plotted, showing Asia's higher GDP growth.")
