## Data Staging process for Phase 2 of CSI4142 Project

### 1. Extraction

In [None]:
# pip install and imports here
import pandas as pd

In [None]:
# Read base dataset in a pandas data frame and print
base_pokemon_df = pd.read_csv("data/base_df_pokemon.csv")
print(base_pokemon_df)

In [None]:
print("Now we verify the types pandas assigned to our columns")
print(base_pokemon_df.dtypes)
print("dtype = object signifies a string")

### 2. Transformation

In [None]:
#pip install and imports here

In [None]:
#create needed columns for transformation
print("Creating DexEntry Key column...")
base_pokemon_df["dexEntry key"] =  base_pokemon_df["id"].astype(str)+base_pokemon_df["generation"].str.replace("generation","")

print("Creating ContextInfo key column...")
base_pokemon_df["contextInfo key"] = base_pokemon_df["rank"]+base_pokemon_df["generation"].str.replace("generation","")
#print(base_pokemon_df)

We create dataframes that match our model's dimensions

In [None]:
print("Pokedex Entry dimension\n")

pkd_entry_dim_cols = ["dexEntry key","id","name","evolves_from","type1","type2","height","weight","abilities"]
pkd_entry_df = base_pokemon_df[pkd_entry_dim_cols]

print("Renaming the columns to match our model...")
pkd_entry_cols_names = {"id": "pokedex Id", "height":"height(cm)", "weight" : "weight(kg)"}
pkd_entry_df= pkd_entry_df.rename(columns=pkd_entry_cols_names, errors="raise")

print("Converting height and weight from decameter and dekagram to centimeters and kilogram...")
pkd_entry_df["height(cm)"] *= 10
pkd_entry_df["weight(kg)"] *= 0.1

print("\n")
print(pkd_entry_df)

In [None]:
print("ContextInfo dimension\n")
cinfo_dim_cols = ["contextInfo key","rank","generation"]
cinfo_df = base_pokemon_df[cinfo_dim_cols]

print("Dropping duplicates...")
cinfo_df.drop_duplicates(subset=["contextInfo key"],inplace=True)

print("Reseting indexes after removing duplicates...")
cinfo_df.reset_index(drop=True,inplace=True)
print(cinfo_df)

We create a dataframe for our fact table

In [None]:
print("Fact table\n")
fact_table_cols = ["dexEntry key","contextInfo key","hp", "atk", "def", "spatk","spdef","speed","total"]
fact_table_df = base_pokemon_df[fact_table_cols]

print("Renaming columns to match our model...")
fact_cols_names = {"hp": "Health Points", "atk":"Attack", "def" : "Defense", "spatk":"Special Attack","spdef":"Special Defense", "speed": "Speed", "total":"Total"}
fact_table_df= fact_table_df.rename(columns=fact_cols_names, errors="raise")

print("Checking for null values...")
print(fact_table_df.isnull().sum())

print("\n")
print(fact_table_df)

In [None]:
# If we want to add data from the datasets in data\extended datasets we can do that here

### 3. Loading

Loading the data to our DBMS 

In [32]:
#pip install and imports here
%pip install psycopg2
import psycopg2
import configparser

Note: you may need to restart the kernel to use updated packages.




In [38]:
print("Retrieving configuration...")
config = configparser.ConfigParser()
config.read('settings.ini')
db_config = config['DB CONFIGURATION']

print("Connecting to the database...")

try:
    conn = psycopg2.connect(
        dbname = db_config['DB_NAME'],
        host = db_config['HOST'],
        user = db_config['USER'],
        password = db_config['PASSWORD'],
        port = db_config['PORT']
    )
    cursor = conn.cursor()
except psycopg2.OperationalError as e:
    #print("Error connecting to the database!\n{0}").format(e)
    print("Failed to connect to the database")
    import sys
    err_type, err_obj, traceback = sys.exc_info()

    # get the line number when exception occured
    line_num = traceback.tb_lineno
    print ("\npsycopg2 ERROR:", e, "on line number:", line_num)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type)

    # psycopg2 extensions.Diagnostics object attribute
    print ("\nextensions.Diagnostics:", e.diag)

    # print the pgcode and pgerror exceptions
    print ("pgerror:", e.pgerror)
    print ("pgcode:", e.pgcode, "\n")


Retrieving configuration...
Connecting to the database...
'postgres'
Failed to connect to the database

psycopg2 ERROR:  on line number: 9
psycopg2 traceback: <traceback object at 0x0000015166312280> -- type: <class 'psycopg2.OperationalError'>

extensions.Diagnostics: <psycopg2.extensions.Diagnostics object at 0x0000015167A35510>
pgerror: None
pgcode: None 

