# 0. Installing dependencies

In [169]:
# Importing libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch

%matplotlib inline

# 1. Loading data

In [170]:
# Loading downloaded FIDE data into pandas
csvPath = r'C:\Users\deshi\Code\cemetery-of-culture\year-3\neural-networks\2-loading-a-dataset-into-tensors\Chess FIDE Rankings.csv'

df = pd.read_csv(csvPath)

In [171]:
# Printing the dataset
df

Unnamed: 0,rank,name,ELO,title,federation,games,birth_year
0,1,Magnus Carlsen,2864,GM,Norway,3125,1990
1,2,Liren Ding,2806,GM,China,1630,1992
2,3,Alireza Firouzja,2793,GM,France,845,2003
3,4,Fabiano Caruana,2783,GM,United States,2622,1992
4,5,Levon Aronian,2775,GM,United States,3320,1982
...,...,...,...,...,...,...,...
195,196,Boris Alterman,2608,GM,Israel,697,1970
196,197,Chithambaram VR Aravindh,2608,GM,India,535,1999
197,198,Jonas Buhl Bjerre,2608,GM,Denmark,411,2004
198,199,Surya Shekhar Ganguly,2608,GM,India,1811,1983


# 2. Data cleaning

In [172]:
# Print data types of all the cols
df.dtypes

rank            int64
name           object
ELO             int64
title          object
federation     object
games           int64
birth_year      int64
dtype: object

In [173]:
# Updating the column names
newColList = [
    'Rank',
    'Name',
    'Rating',
    'Title',
    'Federation',
    'Games Played',
    'Year of Birth'
]

df.columns = newColList

In [174]:
# Columns of the dataset as a list
df.columns.to_list()

['Rank',
 'Name',
 'Rating',
 'Title',
 'Federation',
 'Games Played',
 'Year of Birth']

In [175]:
df = df.set_index('Rank')

In [176]:
df

Unnamed: 0_level_0,Name,Rating,Title,Federation,Games Played,Year of Birth
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Magnus Carlsen,2864,GM,Norway,3125,1990
2,Liren Ding,2806,GM,China,1630,1992
3,Alireza Firouzja,2793,GM,France,845,2003
4,Fabiano Caruana,2783,GM,United States,2622,1992
5,Levon Aronian,2775,GM,United States,3320,1982
...,...,...,...,...,...,...
196,Boris Alterman,2608,GM,Israel,697,1970
197,Chithambaram VR Aravindh,2608,GM,India,535,1999
198,Jonas Buhl Bjerre,2608,GM,Denmark,411,2004
199,Surya Shekhar Ganguly,2608,GM,India,1811,1983


# 3. Exploring the data

In [177]:
# Check the shape of data
df.shape

(200, 6)

In [178]:
df.describe()

Unnamed: 0,Rating,Games Played,Year of Birth
count,200.0,200.0,200.0
mean,2662.385,1394.185,1989.395
std,47.208793,776.177129,9.057548
min,2608.0,277.0,1965.0
25%,2623.75,785.0,1984.0
50%,2650.5,1211.0,1990.0
75%,2686.0,1784.0,1996.0
max,2864.0,3913.0,2006.0


In [179]:
# Easier command to display the list of columns
list(df)

['Name', 'Rating', 'Title', 'Federation', 'Games Played', 'Year of Birth']

In [180]:
# Stats for games played
df['Games Played'].describe()

count     200.000000
mean     1394.185000
std       776.177129
min       277.000000
25%       785.000000
50%      1211.000000
75%      1784.000000
max      3913.000000
Name: Games Played, dtype: float64

In [181]:
# Stats for rating points
df['Rating'].describe()

count     200.000000
mean     2662.385000
std        47.208793
min      2608.000000
25%      2623.750000
50%      2650.500000
75%      2686.000000
max      2864.000000
Name: Rating, dtype: float64

In [182]:
# All the top chess players with a rating of more than 2700
df[
    df['Rating'] > 2700
]

Unnamed: 0_level_0,Name,Rating,Title,Federation,Games Played,Year of Birth
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Magnus Carlsen,2864,GM,Norway,3125,1990
2,Liren Ding,2806,GM,China,1630,1992
3,Alireza Firouzja,2793,GM,France,845,2003
4,Fabiano Caruana,2783,GM,United States,2622,1992
5,Levon Aronian,2775,GM,United States,3320,1982
6,Wesley So,2773,GM,United States,1902,1993
7,Ian Nepomniachtchi,2766,GM,Russian Federation,2297,1990
8,Richard Rapport,2764,GM,Hungary,1278,1996
9,Anish Giri,2760,GM,Netherlands,2209,1994
10,Hikaru Nakamura,2760,GM,United States,3147,1987


In [196]:
# Find all the players born in the 00s
df[
    df['Year of Birth'] >= 2000
].sort_values(
    by=['Year of Birth'], 
    ascending=False
)

Unnamed: 0_level_0,Name,Rating,Title,Federation,Games Played,Year of Birth
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
53,Gukesh D,2684,GM,India,569,2006
187,Raunak Sadhwani,2611,GM,India,511,2005
144,Javokhir Sindarov,2629,GM,Uzbekistan,389,2005
107,Praggnanandhaa R,2648,GM,India,496,2005
198,Jonas Buhl Bjerre,2608,GM,Denmark,411,2004
100,Nihal Sarin,2651,GM,India,676,2004
45,Nodirbek Abdusattorov,2688,GM,Uzbekistan,570,2004
50,Vincent Keymer,2686,GM,Germany,392,2004
148,Awonder Liang,2625,GM,United States,429,2003
3,Alireza Firouzja,2793,GM,France,845,2003


In [183]:
# Finding out highest rated player born in the 90s
df[
    (df['Rating'] > 2700) & (df['Year of Birth'] > 1990)
]

Unnamed: 0_level_0,Name,Rating,Title,Federation,Games Played,Year of Birth
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,Liren Ding,2806,GM,China,1630,1992
3,Alireza Firouzja,2793,GM,France,845,2003
4,Fabiano Caruana,2783,GM,United States,2622,1992
6,Wesley So,2773,GM,United States,1902,1993
8,Richard Rapport,2764,GM,Hungary,1278,1996
9,Anish Giri,2760,GM,Netherlands,2209,1994
15,Jan Krzysztof Duda,2750,GM,Poland,1213,1998
22,Yi Wei,2727,GM,China,979,1999
23,Quang Liem Le,2722,GM,Viet Nam,1725,1991
26,Sam Shankland,2720,GM,United States,1007,1991


In [193]:
# Finding out highest rated player with the least amount of games
gamesSort = df.sort_values(
    by=['Games Played'],
    ascending=True
)

gamesSort.head()

Unnamed: 0_level_0,Name,Rating,Title,Federation,Games Played,Year of Birth
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
188,Aryan Chopra,2610,GM,India,277,2001
157,Cristobal Henriquez Villagra,2620,GM,Chile,320,1996
161,Nodirbek Yakubboev,2620,GM,Uzbekistan,364,2002
84,Qun Ma,2666,GM,China,366,1991
144,Javokhir Sindarov,2629,GM,Uzbekistan,389,2005


In [185]:
df['Federation'].value_counts()

Russian Federation       22
United States            19
India                    17
Ukraine                  13
FIDE                     12
China                    12
Germany                   9
Armenia                   9
France                    7
Azerbaijan                7
Spain                     6
Netherlands               6
United Kingdom            5
Poland                    5
Hungary                   5
Israel                    4
Uzbekistan                4
Czech Republic            3
Norway                    3
Iran                      3
Romania                   2
Viet Nam                  2
Croatia                   2
Bulgaria                  2
Argentina                 2
Serbia                    2
Tajikistan                1
Denmark                   1
Uruguay                   1
Paraguay                  1
Italy                     1
Kazakhstan                1
Chile                     1
Turkey                    1
Canada                    1
Slovenia            