<a href="https://colab.research.google.com/github/Integraloflnx/Pokemon_Project/blob/main/pokemon_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import Needed Packages**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.formula.api import ols
from statsmodels.api import qqplot
from pandas.plotting import sca

**Read in the Data**

In [None]:
attributes_list = ["weight_kg", "type1", "hp", "attack", "defense", "speed", "generation"]

#Read the data in from github and create a dataframe with the specified attributes, we limit our data to generation 1 pokemon
generation_boolean_condition = ["generation"] == 1
url = "https://github.com/Integraloflnx/Pokemon_Project/blob/main/data/pokemon.csv?raw=true"
pokemon = pd.read_csv(url, index_col = "name")[(pd.read_csv(url, index_col = "name")["generation"] == 1) | (pd.read_csv(url, index_col = "name")["generation"] == 2)][attributes_list]

pokemon.tail()

Unnamed: 0_level_0,weight_kg,type1,hp,attack,defense,speed,generation
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Pupitar,152.0,rock,70,84,70,51,2
Tyranitar,202.0,rock,100,164,150,71,2
Lugia,216.0,psychic,106,90,130,110,2
Ho-Oh,199.0,fire,106,130,90,90,2
Celebi,5.0,psychic,100,100,100,100,2


**Handling Missing Values**

In [None]:
#Determine if there are any missing values in the data set
print(pokemon.isna().sum())

#As weight is our response variable we will exclude these points from the dataset
pokemon.dropna(inplace =  True)

#Doublecheck to ensure we have no missing values
print(pokemon.isna().sum())

weight_kg     18
type1          0
hp             0
attack         0
defense        0
speed          0
generation     0
dtype: int64
weight_kg     0
type1         0
hp            0
attack        0
defense       0
speed         0
generation    0
dtype: int64


**Determining the Shape of the Data**

In [None]:
#Determine how many data points there are
print("(rows, columns): ", pokemon.shape)

(rows, columns):  (233, 7)


**Create an Indicator Variable for When a Pokemon is of Type Rock**

In [None]:
#Create an indicator variable called is_rock determining whether or not a pokemon's type is rock
pokemon["is_rock"] = (pokemon["type1"] == "rock").astype(int)
pokemon.tail()

Unnamed: 0_level_0,weight_kg,type1,hp,attack,defense,speed,generation,is_rock
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Pupitar,152.0,rock,70,84,70,51,2,1
Tyranitar,202.0,rock,100,164,150,71,2,1
Lugia,216.0,psychic,106,90,130,110,2,0
Ho-Oh,199.0,fire,106,130,90,90,2,0
Celebi,5.0,psychic,100,100,100,100,2,0


**Drop Unwanted Columns**

In [None]:
#Drop the type1 and generation column
pokemon.drop(columns = ["type1", "generation"], inplace = True)
pokemon.tail()

Unnamed: 0_level_0,weight_kg,hp,attack,defense,speed,is_rock
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Pupitar,152.0,70,84,70,51,1
Tyranitar,202.0,100,164,150,71,1
Lugia,216.0,106,90,130,110,0
Ho-Oh,199.0,106,130,90,90,0
Celebi,5.0,100,100,100,100,0


**Obtain Summary Statistics**

In [None]:
#We now obtain summary statistics for weight
summary_statistics = pokemon["weight_kg"].agg([np.mean, np.median, np.std, np.max, np.min])
summary_statistics

mean       47.234335
median     29.000000
std        61.111724
amax      460.000000
amin        0.100000
Name: weight_kg, dtype: float64

In [None]:
#Create scatterplots of the data


In [None]:
#Fit the multiple linear regression model
poke = ols("weight_kg ~ hp + attack + defense + speed + is_rock", data = pokemon).fit()
#Get the estimated coefficients for the model
poke.params

Intercept   -77.176052
hp            0.611674
attack        0.424037
defense       0.624291
speed         0.117778
is_rock      12.548724
dtype: float64