In [1]:
# Dependencies and Setup
import pandas as pd
import csv
import os
import numpy as np

## Load and Clean Data

In [2]:
# Files to Load
cont_code_path = os.path.join(os.getcwd(), "Resources", "country-and-continent-codes-list.csv")
teams_path = os.path.join(os.getcwd(), "Resources", "highest_earning_teams.csv")
players_path = os.path.join(os.getcwd(), "Resources", "highest_earning_players.csv")
time_trends_path = os.path.join(os.getcwd(), "Resources", "time_trends.csv")
trends_path = os.path.join(os.getcwd(), "Resources", "trend_map.csv")

In [3]:
# Read Files and Store Into Pandas Data Frame
cont_code_df = pd.read_csv(cont_code_path)
teams_df = pd.read_csv(teams_path)
players_df = pd.read_csv(players_path)
time_trends_df = pd.read_csv(time_trends_path)
trends_geo_df = pd.read_csv(trends_path)

In [4]:
## Look at our data
cont_code_df.head()

Unnamed: 0,Continent_Name,Continent_Code,Country_Name,Two_Letter_Country_Code,Three_Letter_Country_Code,Country_Number
0,Asia,AS,"Afghanistan, Islamic Republic of",AF,AFG,4.0
1,Europe,EU,"Albania, Republic of",AL,ALB,8.0
2,Antarctica,AN,Antarctica (the territory South of 60 deg S),AQ,ATA,10.0
3,Africa,AF,"Algeria, People's Democratic Republic of",DZ,DZA,12.0
4,Oceania,OC,American Samoa,AS,ASM,16.0


In [5]:
teams_df.head()

Unnamed: 0,TeamId,TeamName,TotalUSDPrize,TotalTournaments,Game,Genre
0,760,San Francisco Shock,3105000.0,7,Overwatch,First-Person Shooter
1,776,London Spitfire,1591136.5,13,Overwatch,First-Person Shooter
2,768,New York Excelsior,1572618.5,18,Overwatch,First-Person Shooter
3,773,Philadelphia Fusion,1186278.5,15,Overwatch,First-Person Shooter
4,766,Seoul Dynasty,1130000.0,6,Overwatch,First-Person Shooter


In [6]:
players_df.head()

Unnamed: 0,PlayerId,NameFirst,NameLast,CurrentHandle,CountryCode,TotalUSDPrize,Game,Genre
0,3883,Peter,Rasmussen,dupreeh,dk,1822989.41,Counter-Strike: Global Offensive,First-Person Shooter
1,3679,Andreas,Højsleth,Xyp9x,dk,1799288.57,Counter-Strike: Global Offensive,First-Person Shooter
2,3885,Nicolai,Reedtz,dev1ce,dk,1787489.88,Counter-Strike: Global Offensive,First-Person Shooter
3,3672,Lukas,Rossander,gla1ve,dk,1652350.75,Counter-Strike: Global Offensive,First-Person Shooter
4,17800,Emil,Reif,Magisk,dk,1416448.64,Counter-Strike: Global Offensive,First-Person Shooter


In [7]:
time_trends_df = time_trends_df.drop([0])
## Need to fix headers
new_header = time_trends_df.iloc[0]
time_trends_df = time_trends_df[1:]
time_trends_df.columns = new_header
time_trends_df = time_trends_df.rename(columns={"League of Legends: (Worldwide)" : "League_of_Legends", 
                                              "Dota 2: (Worldwide)" : "Dota_2",
                                              "Fortnite: (Worldwide)" : "Fortnite",
                                              "PlayerUnknown's Battlegrounds: (Worldwide)" : "PUBG",
                                              "Counter-Strike: Global Offensive: (Worldwide)" : "CSGO"
                                             })
time_trends_df.head()

1,Week,League_of_Legends,Dota_2,Fortnite,PUBG,CSGO
2,9/6/2020,26,5,74,76,11
3,9/13/2020,26,5,72,53,11
4,9/20/2020,27,5,72,45,11
5,9/27/2020,31,4,65,41,11
6,10/4/2020,30,4,57,39,10


In [8]:
trends_geo_df = trends_geo_df.drop([0])
## Need to fix headers
new_header = trends_geo_df.iloc[0]
trends_geo_df = trends_geo_df[1:]
trends_geo_df.columns = new_header
trends_geo_df = trends_geo_df.rename(columns={"League of Legends: (9/1/20 - 9/1/21)" : "League_of_Legends", 
                                              "Dota 2: (9/1/20 - 9/1/21)" : "Dota_2",
                                              "Fortnite: (9/1/20 - 9/1/21)" : "Fortnite",
                                              "PlayerUnknown's Battlegrounds: (9/1/20 - 9/1/21)" : "PUBG",
                                              "Counter-Strike: Global Offensive: (9/1/20 - 9/1/21)" : "CSGO"
                                             })
trends_geo_df.head()

1,Country,League_of_Legends,Dota_2,Fortnite,PUBG,CSGO
2,Iraq,<1%,<1%,9%,91%,<1%
3,Bhutan,2%,1%,3%,94%,<1%
4,Myanmar (Burma),8%,11%,3%,75%,3%
5,Kiribati,,,,100%,
6,Libya,1%,<1%,7%,92%,<1%


## Create Database

In [9]:
## Dependencies
from sqlalchemy import create_engine
from hiddenConfig import password
import psycopg2

In [10]:
## establishing the connection with database. This checks whether or not the database exists before creation. If it is,
## it will tell you. Then move onto the next cell
conn = None
try:
    conn = psycopg2.connect(
       database="postgres", user='postgres', password=password, host='127.0.0.1', port= '5432'
    )

except:
    print('Database not connected.')
    
if conn is not None:
    conn.autocommit = True

    #Creating a cursor object
    cursor = conn.cursor()

    cursor.execute("SELECT datname FROM pg_database;")
    
    list_database = cursor.fetchall()
    database_name = 'esports_db'
    
    if (database_name,) in list_database:
        print(f"'{database_name}' Database already exists")
    else:
        #Preparing query to create a database
        sql = '''CREATE database esports_db''';

        #Creating a database
        cursor.execute(sql)
        print("Database created successfully!")

#Closing the connection
conn.close()

'esports_db' Database already exists


In [11]:
## Connect to local database
rds_connection_string = f"postgres:{password}@localhost:5432/esports_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [12]:
## Check to see if there are any tables already in the database
engine.table_names()

['cont_code_data',
 'team_data',
 'trends_geo_data',
 'player_data',
 'time_trends_data']

#### Load the dataframes into our database

In [14]:
cont_code_df.to_sql(name='cont_code_data', con=engine, if_exists='replace', index=False)
teams_df.to_sql(name='team_data', con=engine, if_exists='replace', index=False)
trends_geo_df.to_sql(name='trends_geo_data', con=engine, if_exists='replace', index=False)
players_df.to_sql(name='player_data', con=engine, if_exists='replace', index=False)
time_trends_df.to_sql(name='time_trends_data', con=engine, if_exists='replace', index=False)

#### Check to make sure it worked OK

In [15]:
engine.execute('select * from cont_code_data').first()

('Asia', 'AS', 'Afghanistan, Islamic Republic of', 'AF', 'AFG', 4.0)

In [16]:
engine.execute('select * from team_data').first()

(760, 'San Francisco Shock', 3105000.0, 7, 'Overwatch', 'First-Person Shooter')

In [17]:
engine.execute('select * from trends_geo_data').first()

('Iraq', '<1%', '<1%', '9%', '91%', '<1%')

In [18]:
engine.execute('select * from player_data').first()

(3883, 'Peter', 'Rasmussen', 'dupreeh', 'dk', 1822989.41, 'Counter-Strike: Global Offensive', 'First-Person Shooter')

In [19]:
engine.execute('select * from time_trends_data').first()

('9/6/2020', '26', '5', '74', '76', '11')