In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import codecademylib3_seaborn
import glob

# Loading all files into a dataframe:
states_files = glob.glob("states*.csv")
df_list = []
for filename in states_files:
  data = pd.read_csv(filename)
  df_list.append(data)

us_census = pd.concat(df_list)
print(us_census.head())

# Checking the columns and datatype of the dataframe:
print(us_census.dtypes)

# Converting the Income column to a numerical type:
us_census["Income"] = us_census.Income.replace("[\$,]", "", regex=True)
us_census.Income = pd.to_numeric(us_census.Income)

# Separating the GenderPop column into men column and female column:
gender_split = us_census.GenderPop.str.split("_")
us_census["Men"] = gender_split.str.get(0)
us_census["Women"] = gender_split.str.get(1)

# Converting both Men and Women columns into numerical datatypes:
us_census["Men"] = us_census.Men.replace("[M,]", "", regex=True)
us_census.Men = pd.to_numeric(us_census.Men)

us_census["Women"] = us_census.Women.replace("[F,]", "", regex=True)
us_census.Women = pd.to_numeric(us_census.Women)
print(us_census)

# Scatterplot!
plt.scatter(us_census.Women, us_census.Income)
plt.show()

# Filling in the Nan values in the Women Column
us_census = us_census.fillna(value={"Women":us_census.TotalPop - us_census.Men})
print(us_census.Women)

# Checking for duplicates:
duplicates = us_census.duplicated()
print(duplicates.value_counts())
#No duplicates

# Scatterplot!
plt.scatter(us_census.Women, us_census.Income)
plt.show()

# Histograms of Races:
def races(race):
   us_census[race] = us_census[race].replace("[,%]", "", regex=True)
   us_census[race] = pd.to_numeric(us_census[race])

races("Hispanic")
races("White")
races("Black")
races("Native")
races("Asian")
races("Pacific")

# Filling up Nan values in the Pacific Columns:
us_census = us_census.fillna(value={"Pacific":100-(us_census.Hispanic + us_census.White + us_census.Black + us_census.Native + us_census.Asian)})

# Plotting Histogram:
plt.hist([us_census.Hispanic, us_census.White, us_census.Black, us_census.Native, us_census.Asian, us_census.Pacific])
plt.show()