In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 
from scipy import stats
import math
import multiprocessing 
import warnings
from functools import partial
from time import sleep
import datatable as dt

In [None]:
df = pd.read_csv("twic_master.csv") 

In [None]:
columns_to_drop = ['Variation', 'twic_number', 'EventDate', 'mainline_moves', 'Site', 
                   'Round', 'ECO', 'WhiteFideId', 'BlackFideId', 'WhiteTeam', 'BlackTeam', 'WhiteTitle', 'BlackTitle', 
                   'EventType', 'FEN', 'SetUp', 'Variant', 'Board', 'PlyCount', 'EventCategory']

df.drop(columns=columns_to_drop,inplace=True)

In [None]:
df['Year'] = df['Date'].map(lambda x: str(x)[:4])

print(df['Year'].unique())

# Get Selected Years
df = df[df['Year'].isin(['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022'])].copy()

In [None]:
viz1 = df.groupby(['Year', 'Opening'])['Opening'].count().reset_index(name='count').sort_values(['Year', 'count'], ascending=[True, False]).groupby('Year').head(3)

viz1 = viz1.query("""Year <= '2022'""")

# viz1.to_csv("viz1.csv", header=True, index=False)

In [None]:
plt.figure(figsize=(15,10))
sns.set_style("whitegrid")
plt.title("Sicilian is the Most Played Chess Opening Variation in Tournaments Since 2012",size=20)

palette={'Sicilian':'#448AFF', 
         'French':'#455A64', 
         'King\'s Indian':'#607D8B', 
         'Queen\'s pawn game':'#CFD8DC'}

viz1_plot = sns.barplot(data=viz1, x="count", y="Year", hue="Opening")

viz1_plot.bar_label(viz1_plot.containers[0],size=14)

plt.ylabel("Years",size = 20)
plt.tick_params(axis='y',labelsize=15)

plt.xlabel("Number of Matches Played", size = 20)
plt.tick_params(axis='x',labelsize=15)

fig = viz1_plot.get_figure()
fig.savefig("DMV_Q1_MostPlayedOpening.png")

In [None]:
viz2 = df[['Year', 'Online']].groupby(['Year', 'Online'])['Online'].count()

In [None]:
viz2 = pd.DataFrame(viz2).rename(columns={"Online": "Match_Count"}).reset_index()

In [None]:
viz2['Online'].replace([True,False],['Online','Offline'],inplace=True)

In [None]:
viz2

In [None]:
sns.set_style("whitegrid")
plt.figure(figsize=(15,10))
plt.title("Format of Games Played Over Years",size=20)
plt.xlabel("Years",size = 20)
plt.tick_params(axis='x',labelsize=15)
plt.ylabel("Matches Played", size = 20)
plt.tick_params(axis='y',labelsize=15)
sns.lineplot(data=viz2, x='Year', y='Match_Count',hue='Online')
plt.legend(loc='upper left',prop={'size':20})

In [None]:
viz3 = df[['White', 'Black','Result','Event','Year']].groupby(['Year','Result'])['Result'].count()

In [None]:
conditions = [(df['Result'] == '1-0'),
    (df['Result'] == '0-1'),
    (df['Result'] == '1/2-1/2')]
values = ['White_Pieces', 'Black_Pieces','Draw']
df['Winners'] = np.select(conditions,values)

In [None]:
viz3 = df[['Year','Winners']].groupby(['Year','Winners'])['Winners'].count()

In [None]:
viz3 = pd.DataFrame(viz3).rename(columns={"Winners": "Match_Count"}).reset_index()

In [None]:
viz3 = viz3[viz3.Winners != '0']

In [None]:
viz3

In [None]:
sns.set_style("whitegrid")
plt.figure(figsize=(15,10))
plt.title("White Pieces have Higher Winning Rate over Years",size=20)
plt.xlabel("Years",size = 20)
plt.tick_params(axis='x',labelsize=15)
plt.ylabel("Number of Matches Won over Years", size = 20)
plt.tick_params(axis='y',labelsize=15)
sns.scatterplot(x=viz3.Year,y=viz3.Match_Count,hue=viz3.Winners,s=1000,alpha=0.5)
#mylables = ['Black_Pieces','Draw','White_Pieces']
plt.legend(loc='upper left',prop={'size':20})
#plt.show()