In [None]:
import os

# Get all files stored inside data directory
fileLists = os.listdir('data')

for file in fileLists:
    print(file) 

In [None]:
import pandas as pnd

# Deactivate the maximum number of DataFrame columns to be display
pnd.set_option('display.max_columns', None)

# Load pokemons'dara inside a Dataframe named ourPokemons
ourPokemons = pnd.read_csv('data/pokedex.csv')

# Show Dataframes'columns
# |---------------------------------------------------------------------------------------------------|
# | Column                    |            Meaning                                                    |
# |---------------------------------------------------------------------------------------------------|
# | NUMERO                    |   Id of the pokemon                                                   |
# | NOM                       |   Pokemon's name                                                      |
# | TYPE_1                    |   Primary type (Grass, Fire, Steel, ...)                              |
# | TYPE_2                    |   Secondary type (Grass, Fire, Steel, ...)                            |
# | POINT_DE_VIE              |   Life of the pokemon                                                 |
# | NIVEAU_ATTAQUE            |   The Pokemon's attack level                                          |
# | NIVEAU_DEFENSE            |   The Pokemon's defense level                                         |
# | NIVEAU_ATTAQUE_SPECIALE   |   Pokemon's special attack level                                      |
# | NIVEAU_DEFENSE_SPECIALE   |   Pokemon's special defense level                                     |
# | VITESSE                   |   Pokemon's speed                                                     |
# | GENERATION                |   Generation number to which Pokemon belongs                          |
# | LEGENDAIRE                |   Is the Pokémon legendary? This is a Boolean data (True or False)    |
# |---------------------------------------------------------------------------------------------------|
print(ourPokemons.columns.values)

# Display the first 10 lines inside the Dataframe
print(ourPokemons.head(10))


In [None]:
# Transforming the LEGENDARY column into an integer 0=FALSE and 1=TRUE
ourPokemons['LEGENDAIRE'] = (ourPokemons['LEGENDAIRE'] == 'VRAI').astype(int)

print(ourPokemons['LEGENDAIRE'].head(100))

# Counting the number of observations and features
print(ourPokemons.shape)

# Information about our datasets
print(ourPokemons.info())

# Search the line which contains a null value (Name)
print(ourPokemons[ourPokemons['NOM'].isnull()])

In [None]:
# Load combats.csv file
fighting = pnd.read_csv('data/combats.csv')

# Show features
print(fighting.columns.values)

# Show first 10 lines inside the dataframe
print(fighting.head(10))

# Count lines and column numbers
print(fighting.shape)

# Information about our dataset
print(fighting.info())

In [None]:
# When a grouping is performed using the groupBy () function, all the features in the dataset take on the value of this grouping.
# Features in the dataset take on the value of this grouping.
# This is why the features SECOND_POKEMON and POKEMON WINNER have the same value
nbPokemonOnFirstPosition = fighting.groupby('POKEMON_PREMIER').count()
nbPokemonOnSecondPosition = fighting.groupby('POKEMON_SECOND').count()
nbTotalFightPerPokemon = nbPokemonOnFirstPosition + nbPokemonOnSecondPosition

nbWin = fighting.groupby('GAGNANT').count()
print(nbWin)

In [None]:
#We create a list from an extraction to obtain the list of list of Pokémons, sorted by number
#This list of numbers will enable us to aggregate the data
listToBeAggregated = fighting.groupby('GAGNANT').count()
listToBeAggregated.sort_index()

# We add the number of fights
listToBeAggregated['NBR_COMBATS'] = nbPokemonOnFirstPosition.GAGNANT + nbPokemonOnSecondPosition.GAGNANT

# We add the number of wins
listToBeAggregated['NBR_VICTOIRES'] = nbWin.POKEMON_PREMIER

# We calculate the percentage of wins
listToBeAggregated['POURCENTAGE_DE_VICTOIRES'] = nbWin.POKEMON_PREMIER / (nbPokemonOnFirstPosition.GAGNANT + nbPokemonOnSecondPosition.GAGNANT)

newPokedex = ourPokemons.merge(listToBeAggregated, left_on='NUMERO', right_index=True, how='left')

print(newPokedex)

In [9]:
# Ecart-type (std = standard deviation)
# The standard deviation is a statistical value, used to show the distribution
# of the data around the mean. The smaller the value, the closer the data
# the data are close to the mean, the further away they are.
print(newPokedex.describe())

         NUMERO  POINTS_DE_VIE  NIVEAU_ATTAQUE  NIVEAU_DEFENSE  \
count  800.0000     800.000000      800.000000      800.000000   
mean   400.5000      69.258750       79.001250       73.842500   
std    231.0844      25.534669       32.457366       31.183501   
min      1.0000       1.000000        5.000000        5.000000   
25%    200.7500      50.000000       55.000000       50.000000   
50%    400.5000      65.000000       75.000000       70.000000   
75%    600.2500      80.000000      100.000000       90.000000   
max    800.0000     255.000000      190.000000      230.000000   

        NIVEAU_ATTAQUE_SPECEIALE  NIVEAU_DEFENSE_SPECIALE     VITESSE  \
count                 800.000000               800.000000  800.000000   
mean                   72.820000                71.902500   68.277500   
std                    32.722294                27.828916   29.060474   
min                    10.000000                20.000000    5.000000   
25%                    49.750000        