In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


db = pd.read_csv ("steam.csv")

### Inspecting and preparing data

In [None]:
# view the first 5 rows of dataset.
db.head()

In [None]:
# viwe informations about dataframe like dtype of index and non-null values.
db.info()

In [None]:
# this method used for calculating some statistical data.
db.describe()

In [None]:
# check if there's null value.
db.isnull().sum()

### change owners coulmn to avg_owners.

In [None]:
# as seen above in db.info() that owners column type is object
# now we will change the type of the column to int and calculate the average by using lambda with .apply() .
db['owners'] = db['owners'].str.split('-').apply(lambda x: (int(x[0]) + int(x[1]))/2).astype(int)
db.rename(columns = {'owners':'avg_owners'}, inplace = True)



### change release_date and add new coulmn (years).

In [None]:
#creat a new coulmn that has just years from release_date.
db['years'] = pd.DatetimeIndex(db['release_date']).year
db.info()

### Calculate total profit

In [None]:
# create total_price and multiplication avg_owners by price.
db['total_profit'] = db['avg_owners']*db['price'].astype(int)
db.head()

### check heatmap

In [None]:
# change size for heatmap.
sns.set(rc = {'figure.figsize':(16,10)})
# show heatmap.
sns.heatmap(db.corr(), annot=True);

In [None]:
# Drop all columns that not useful for my projecr.
db.drop(columns='appid', inplace = True)
db.drop(columns='english', inplace = True)
db.drop(columns='developer', inplace = True)
db.drop(columns='publisher', inplace = True)
db.drop(columns='platforms', inplace = True)
db.drop(columns='required_age', inplace = True)
db.drop(columns='steamspy_tags', inplace = True)
db.drop(columns='achievements', inplace = True)
db.drop(columns='average_playtime', inplace = True)
db.drop(columns='median_playtime', inplace = True)
# view the new data after dropping columns.
db

# Questions:

## In the last 5 years, what are the most games that has the highest positive ratings?


In [None]:
# creating a new column that has filtered descendingly, by column years.
sort_years = db.sort_values("years",ascending=False)

# sort years to show me just years >= 2014 .
sort_years = sort_years[sort_years['years']>=2014]

#after sorting years, sort rating descendingly.
sort_ratings = sort_years.sort_values("positive_ratings",ascending=False).head(10)

# visualize name and ratings.

fig = px.bar(sort_ratings,x='name' , y='positive_ratings',title='Top 10 games')
fig.show()

## In the last 5 years, what are the most genres based on the number of owners?

In [None]:
# show most unique genres.
sort_years['genres'].value_counts().head()

In [None]:
# create most_genres1,mo.... to contain the top 5 unique genres.
most_genres1 = sort_years['genres'].str.contains('Action;Indie')
most_genres2 = sort_years['genres'].str.contains('Casual;Indie')
most_genres3 = sort_years['genres'].str.contains('Action;Adventure;Indie')
most_genres4 = sort_years['genres'].str.contains('Adventure;Indie')
most_genres5 = sort_years['genres'].str.contains('Action;Casual;Indie')

# in all that unique genres give me sum of avg_owners.
most_genres_owners1 = sort_years[most_genres1].avg_owners.sum()
most_genres_owners2 = sort_years[most_genres2].avg_owners.sum()
most_genres_owners3 = sort_years[most_genres3].avg_owners.sum()
most_genres_owners4 = sort_years[most_genres4].avg_owners.sum()
most_genres_owners5 = sort_years[most_genres5].avg_owners.sum()

# visualize as pie.
data = [most_genres_owners1, most_genres_owners2, most_genres_owners3, most_genres_owners4, most_genres_owners5]
labels = ['Action;Indie', 'Casual;Indie', 'Action;Adventure;Indie', 'Adventure;Indie', 'Action;Casual;Indie']

fig = px.pie( values=data, names=labels, title='Most genres in the last 5 years')
fig.show()

## In the last 5 years, Does the game affect the number of owners if it's a Single-Player or Multi-Player?

In [None]:
# create variables to return only rows contains (something i choose).
Single_Player = sort_years['categories'].str.contains('Single-player')
Multi_Player = sort_years['categories'].str.contains('Multi-player')
S_M = sort_years['categories'].str.contains('Single-player','Multi-player')

# make this variables return sum of avg_owners.
Single_Player_owners = sort_years[Single_Player].avg_owners.sum()
Multi_Player_owners = sort_years[Multi_Player].avg_owners.sum()
S_M_O = sort_years[S_M].avg_owners.sum()

# visualize as pie.
data = [Single_Player_owners, Multi_Player_owners, S_M_O]
labels = ['Single Player', 'Multi Player', 'Both']

fig = px.pie( values= data, names=labels, title='Single Player - Multi Player')
fig.show()

## In the last 5 years, Does the game, if it's free or paid, affect on the number of owners?

In [None]:
# sort paid games
paid = sort_years[sort_years['price']>0.0]
paid_owners = paid.avg_owners.sum()

#sort free games
free = sort_years[sort_years['price']== 0.0]
free_owners = free.avg_owners.sum()

# visualize as bar.
data = [paid_owners, free_owners]
labels = ['Paid', 'Free']

fig = px.bar(x=labels, y=data , title= 'Owners')
                                                                
fig.show()


## In the last 5 years, what about total profit.

In [None]:
# Calculate total profit and visualize as bar.
fig = px.bar(sort_ratings,x='name', y='total_profit' )
fig.show()