# Working With Types
## Casting Types & Missing Values

In [1]:
import pandas as pd
houses = pd.read_csv("data/kc_house_data.csv")
titanic = pd.read_csv("data/titanic.csv")
netflix = pd.read_csv("data/netflix_titles.csv", sep="|", index_col=0)
btc = pd.read_csv("data/coin_Bitcoin.csv")
countries = pd.read_csv("data/world-happiness-report-2021.csv")

## Casting With astype()

In [2]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   pclass     1309 non-null   int64 
 1   survived   1309 non-null   int64 
 2   name       1309 non-null   object
 3   sex        1309 non-null   object
 4   age        1309 non-null   object
 5   sibsp      1309 non-null   int64 
 6   parch      1309 non-null   int64 
 7   ticket     1309 non-null   object
 8   fare       1309 non-null   object
 9   cabin      1309 non-null   object
 10  embarked   1309 non-null   object
 11  boat       1309 non-null   object
 12  body       1309 non-null   object
 13  home.dest  1309 non-null   object
dtypes: int64(4), object(10)
memory usage: 143.3+ KB


In [3]:
titanic["age"].value_counts()

?         263
24         47
22         43
21         41
30         40
         ... 
66          1
0.6667      1
76          1
67          1
26.5        1
Name: age, Length: 99, dtype: int64

In [4]:
# This gives us an error!!
titanic["age"].astype("float")

ValueError: could not convert string to float: '?'

In [None]:
titanic["age"].replace(['?'], [None], inplace=True)

In [None]:
titanic.age.value_counts(dropna=False)

In [None]:
titanic["age"].astype("float")

In [None]:
titanic.info()

In [None]:
titanic["age_float"] = titanic["age"].astype("float")

In [None]:
titanic

In [None]:
titanic.info()

In [None]:
titanic["age"] = titanic["age"].astype("float")

In [None]:
titanic.info()

In [None]:
titanic["age"].mean()

In [None]:
titanic["sex"].astype("category")

In [None]:
titanic["sex"] = titanic["sex"].astype("category")

In [None]:
titanic.info()

In [None]:
titanic["sex"] = "MALE"

In [None]:
titanic.info()

In [None]:
titanic["embarked"] = titanic["embarked"].astype('category')

In [None]:
titanic.info()

## Casting with pd.to_numeric()

In [None]:
titanic = pd.read_csv("data/titanic.csv")
titanic["age"].value_counts()

In [None]:
pd.to_numeric(titanic["age"], errors="coerce")

In [None]:
titanic["age"] = pd.to_numeric(titanic["age"], errors="coerce")

In [None]:
titanic.info()

In [None]:
titanic["age"].describe()

## isna() and dropna()

In [None]:
stats = pd.read_csv("data/game_stats.csv")

In [None]:
stats

In [None]:
stats.isna()

In [None]:
stats[stats["league"].isna()]

In [None]:
stats["assists"].dropna()

In [None]:
assists = stats["assists"]
assists.dropna(inplace=True)

In [None]:
assists

In [None]:
stats

In [None]:
stats.dropna()

# 

In [None]:
stats

In [None]:
stats = pd.read_csv("data/game_stats.csv")
stats

In [None]:
stats.dropna(how="all")

In [None]:
stats.dropna(subset=["league","points"])

In [None]:
stats.dropna(axis=1)

## Filling NA values with fillna()

In [None]:
stats

In [None]:
stats.fillna(0)

In [None]:
stats["league"].fillna("amateur", inplace=True)

In [None]:
stats

In [None]:
stats.fillna({"points": 0, "assists": "NONE"})

In [None]:
sales = pd.read_csv("data/sales.csv")

In [None]:
sales

In [None]:
sales["shipping_zip"].fillna(sales["billing_zip"], inplace=True)

In [None]:
sales