# Most Played Steam Games

In this project we go to examinate the most played games on **Steam**. Saving the information into a csv file and illustrating this proccess step by step.

## 1- Getting the data from 'https://steamdb.info/graph/' 

In [None]:
from bs4 import BeautifulSoup
import requests
from urllib.request import Request, urlopen

req = Request(
    url='https://steamdb.info/graph/', 
    headers={'User-Agent': 'XYZ/3.0'}
)
webpage = urlopen(req, timeout=10).read()

soup1=BeautifulSoup(webpage,'html.parser')


class_ext=soup1.select('.app')
l=[]
for idx,item in enumerate(class_ext):

    Fabricante = class_ext[idx].getText().split()
    l.append(Fabricante)

information=l[1:]
# There is a lot of information so i will print the first 10 elements
print(information[:10])

In [None]:
new_information_most_played=[]
for i in range(len(information)):
    information_most_played_name=" ".join(information[i][:-4])
    information_most_played_last=information[i][-4:]
    information_most_played_last.insert(0,information_most_played_name)
    new_information_most_played.append(information_most_played_last)
# There is a lot of information so i will print the first 10 elements
new_information_most_played[:10]

## 2- Let's make a DataFrame.

In [None]:
import pandas as pd
df_most_played=pd.DataFrame(new_information_most_played,columns=["Name","Current","Peak_24h","All time Peak","Plus"])
df_most_played

## 3- Drop "Plus" column

In [None]:
df_most_played.drop("Plus",axis=1,inplace=True)
df_most_played

## 4-  Analizing the information

### Checking the type of each column

In [None]:
df_most_played.info()

Let's remove "," character from Players Now and Peak Today columns to convert them values as numeric

In [None]:
df_most_played["Current"]=df_most_played["Current"].str.replace(",","")
df_most_played["Peak_24h"]=df_most_played["Peak_24h"].str.replace(",","")
df_most_played["All time Peak"]=df_most_played["All time Peak"].str.replace(",","")

### Transforming the type of "Current", "Peak_24h" and "All time Peak" columns to integer

In [None]:
df_most_played["Current"]=df_most_played["Current"].astype(int)
df_most_played["Peak_24h"]=df_most_played["Peak_24h"].astype(int)
df_most_played["All time Peak"]=df_most_played["All time Peak"].astype(int)
df_most_played.info()

In [None]:
df_most_played.describe()

## 5- Saving the dataframe into a csv file

In [None]:
df_most_played.to_csv("MOST_PLAYED_GAMES_IN_STEAM.csv",index=None)

In [None]:
df_most_played_most_all_time_peak=df_most_played.sort_values("All time Peak",ascending=False)
df_most_played_most_all_time_peak_10=df_most_played_most_all_time_peak[["Name","All time Peak"]].head(10)
df_most_played_most_all_time_peak_10

## Let's build some charts to illustrate the data

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig,ax =plt.subplots(figsize=(4.5,5),dpi=100)
ax.text(x=-1500000,y=-2,s='Top 10 of Most Played Games on Steam',weight='bold',size=15,color="#040045")
sns.barplot(x = 'All time Peak', y = 'Name', data = df_most_played_most_all_time_peak_10, palette='Spectral_r')
for location in ["left","right","bottom","top"]:
    ax.spines[location].set_visible(False)
ax.set(xlabel=None,ylabel=None)
ax.xaxis.tick_top()
ax.tick_params(top=False,left=False)
ax.tick_params(axis='x',colors='grey')
ax.set_yticks([])
names_models=df_most_played_most_all_time_peak_10["Name"]
for i,models in zip(range(16),names_models):
    ax.text(x=-2600000,y=i+0.12,s=models,fontweight="bold",color="#005566",alpha=1)
ax.axvline(x=1000000,ymin=0,ymax=1,c='grey',alpha=0.5)
ax.set_facecolor('#FFFFFF')
fig.patch.set_facecolor('w')
plt.show()

In [None]:
df_most_played_most_all_time_peak_Peak_24h=df_most_played.sort_values("Peak_24h",ascending=False)
df_most_played_most_all_time_peak_Peak_24h_10=df_most_played_most_all_time_peak_Peak_24h[["Name","Peak_24h"]].head(10)
df_most_played_most_all_time_peak_Peak_24h_10

In [None]:
fig,ax =plt.subplots(figsize=(4.5,5),dpi=100)
ax.text(x=-700000,y=-2,s='Top 10 of Most Played Games on Steam in the last 24h',weight='bold',size=15,color="#040045")
sns.barplot(x = 'Peak_24h', y = 'Name', data = df_most_played_most_all_time_peak_Peak_24h_10, palette='Spectral_r')
for location in ["left","right","bottom","top"]:
    ax.spines[location].set_visible(False)
ax.set(xlabel=None,ylabel=None)
ax.xaxis.tick_top()
ax.set_xticks([0,400000,800000])
ax.tick_params(top=False,left=False)
ax.tick_params(axis='x',colors='grey')
ax.set_yticks([])
names_models=df_most_played_most_all_time_peak_Peak_24h_10["Name"]
for i,models in zip(range(16),names_models):
    ax.text(x=-800000,y=i+0.12,s=models,fontweight="bold",color="#005566",alpha=1)
ax.axvline(x=400000,ymin=0,ymax=1,c='grey',alpha=0.5)
ax.set_facecolor('#FFFFFF')
fig.patch.set_facecolor('w')
plt.show()

In [None]:
df_most_played_most_all_time_peak_Peak_current=df_most_played.sort_values("Current",ascending=False)
df_most_played_most_all_time_peak_Peak_current_10=df_most_played_most_all_time_peak_Peak_current[["Name","Current"]].head(10)
df_most_played_most_all_time_peak_Peak_current_10

In [None]:
fig,ax =plt.subplots(figsize=(4.5,5),dpi=100)
ax.text(x=-700000,y=-2,s='Top 10 of Most Played Games on Steam in the moment',weight='bold',size=15,color="#040045")
sns.barplot(x = 'Current', y = 'Name', data = df_most_played_most_all_time_peak_Peak_current_10, palette='Spectral_r')
for location in ["left","right","bottom","top"]:
    ax.spines[location].set_visible(False)
ax.set(xlabel=None,ylabel=None)
ax.xaxis.tick_top()
ax.set_xticks([0,400000,800000])
ax.tick_params(top=False,left=False)
ax.tick_params(axis='x',colors='grey')
ax.set_yticks([])
names_models=df_most_played_most_all_time_peak_Peak_current_10["Name"]
for i,models in zip(range(16),names_models):
    ax.text(x=-650000,y=i+0.12,s=models,fontweight="bold",color="#005566",alpha=1)
ax.axvline(x=400000,ymin=0,ymax=1,c='grey',alpha=0.5)
ax.set_facecolor('#FFFFFF')
fig.patch.set_facecolor('w')
plt.show()