In [9]:
import pandas as pd
import seaborn as sms
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

In [10]:
df=pd.read_csv('steam_reviews.csv', usecols=['app_name','recommended','author.playtime_forever','author.playtime_at_review'])

In [None]:
plt.figure(figsize=(10, 6))
sms.scatterplot(x=df['author.playtime_forever'], 
                y=df['review_score'], 
                alpha=0.6)
plt.title('Relationship between Playtime and Review Score')
plt.xlabel('Playtime (minutes)')
plt.ylabel('Review Score')
plt.grid(True)
plt.ylim(0, 1)  # Adjust if your review score is scaled from 0 to 10
plt.show()

In [None]:
# Function to classify users based on playtime
def categorize_user_experience(playtime):
    if playtime < 20:
        return 'New User'
    elif playtime < 100:
        return 'Experienced User'
    else:
        return 'Veteran User'

# Apply the function to create a new column in the DataFrame
df['user_experience'] = df['author.playtime_forever'].apply(categorize_user_experience)

In [None]:
#Remember how we defined review_score and add this coloumn


# Step 1: Calculate positive and total reviews for each application
review_counts = df.groupby('app_name').agg(
    positive_reviews=('recommended', 'sum'),  # Sum of recommended reviews (1s)
    total_reviews=('recommended', 'count')     # Total number of reviews (1s and 0s)
).reset_index()

# Step 2: Calculate negative reviews
review_counts['negative_reviews'] = review_counts['total_reviews'] - review_counts['positive_reviews']

# Step 3: Calculate the ratio of positive to negative reviews, avoiding division by zero
review_counts['review_ratio'] = review_counts.apply(
    lambda row: row['positive_reviews'] / row['negative_reviews'] if row['negative_reviews'] > 0 else 1,
    axis=1
)

# Step 4: Define review_score 
review_counts['review_score'] = (review_counts['review_ratio'] / review_counts['review_ratio'].max()) * 10

# Step 5: Merge the review score into your original DataFrame
df = df.merge(review_counts[['app_name', 'review_score']], on='app_name', how='left')




In [None]:
# Calculate average review scores for each user experience level
average_scores = df.groupby('user_experience')['review_score'].mean().reset_index()
print(average_scores)


On average the review score of experienced and veteran users is similar, whereas new users tend to give lower scores to the applications. We should not focus so much on the numbers itself, because they are effected from our choice of time needed to be played to pass from a category to another. We should focus more on the clear difference of rating between new users and more experienced that underlines a trend where more experinced players tend to give higher scores. 
We try to suggest a couple of possible reasons why this happens:
- Beginners have more expectetions and less clue, and unmet expectations can lead to a sense of dissatisfaction towards the game which can lead to negative reviews that influence the everage score.
- Usually those who enjoy the experience tend to spend more time playing and leave favor.

 

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(12, 6))
sms.histplot(df, x='review_score', hue='user_experience', element='step', kde=True, 
             palette={'New User': 'lightcoral', 'Experienced User': 'gold', 'Veteran User': 'mediumseagreen'},
             stat='density', common_norm=False)
plt.title('Distribution of Review Scores by User Experience Level')
plt.xlabel('Review Score')
plt.ylabel('Density')
plt.xlim(0, 1)  # Adjust this if the review score range is different
plt.show()
