In [1]:
import pandas as pd
import numpy as np

In [2]:
villagers = pd.read_csv('villagers.csv', parse_dates =['Birthday'])

In [3]:
villagers.head()

Unnamed: 0,Name,Species,Gender,Personality,Hobby,Birthday,Catchphrase,Favorite Song,Style 1,Style 2,Color 1,Color 2,Wallpaper,Flooring,Furniture List,Filename,Unique Entry ID
0,Admiral,Bird,Male,Cranky,Nature,27-Jan,aye aye,Steep Hill,Cool,Cool,Black,Blue,dirt-clod wall,tatami,717;1849;7047;2736;787;5970;3449;3622;3802;410...,brd06,B3RyfNEqwGmcccRC3
1,Agent S,Squirrel,Female,Peppy,Fitness,2-Jul,sidekick,Go K.K. Rider,Active,Simple,Blue,Black,concrete wall,colorful tile flooring,7845;7150;3468;4080;290;3971;3449;1708;4756;25...,squ05,SGMdki6dzpDZyXAw5
2,Agnes,Pig,Female,Big Sister,Play,21-Apr,snuffle,K.K. House,Simple,Elegant,Pink,White,gray molded-panel wall,arabesque flooring,4129;7236;7235;7802;896;3428;4027;7325;3958;71...,pig17,jzWCiDPm9MqtCfecP
3,Al,Gorilla,Male,Lazy,Fitness,18-Oct,ayyyeee,Go K.K. Rider,Active,Active,Red,White,concrete wall,green rubber flooring,1452;4078;4013;833;4116;3697;7845;3307;3946;39...,gor08,LBifxETQJGEaLhBjC
4,Alfonso,Alligator,Male,Lazy,Play,9-Jun,it'sa me,Forest Life,Simple,Simple,Red,Blue,yellow playroom wall,green honeycomb tile,4763;3205;3701;1557;3623;85;3208;3584;4761;121...,crd00,REpd8KxB8p9aGBRSE


In [4]:
villagers['Birthday'] = pd.to_datetime(villagers['Birthday'], format = "%d-%b")


In [5]:
villagers["Gender"] = villagers["Gender"].replace(["Female", "Male"], [0, 1])

## Changing the Personality to vector values
As the total 8 personality types in the game for villagers are actually split into 4 for each gender, I've decided to refactor the female and male counterparts into the same value. I thought it made more sense to do it this way and "degender" the personality, as we already have a Gender value that we can apply to get the exact Personality.

In [6]:
villagers["Personality"] = villagers["Personality"].replace(["Normal", "Lazy"], 0)
villagers["Personality"] = villagers["Personality"].replace(["Peppy", "Jock"], 1)
villagers["Personality"] = villagers["Personality"].replace(["Snooty", "Cranky"], 2)
villagers["Personality"] = villagers["Personality"].replace(["Big Sister", "Smug"], 3)

In [7]:
villagers.head()

Unnamed: 0,Name,Species,Gender,Personality,Hobby,Birthday,Catchphrase,Favorite Song,Style 1,Style 2,Color 1,Color 2,Wallpaper,Flooring,Furniture List,Filename,Unique Entry ID
0,Admiral,Bird,1,2,Nature,1900-01-27,aye aye,Steep Hill,Cool,Cool,Black,Blue,dirt-clod wall,tatami,717;1849;7047;2736;787;5970;3449;3622;3802;410...,brd06,B3RyfNEqwGmcccRC3
1,Agent S,Squirrel,0,1,Fitness,1900-07-02,sidekick,Go K.K. Rider,Active,Simple,Blue,Black,concrete wall,colorful tile flooring,7845;7150;3468;4080;290;3971;3449;1708;4756;25...,squ05,SGMdki6dzpDZyXAw5
2,Agnes,Pig,0,3,Play,1900-04-21,snuffle,K.K. House,Simple,Elegant,Pink,White,gray molded-panel wall,arabesque flooring,4129;7236;7235;7802;896;3428;4027;7325;3958;71...,pig17,jzWCiDPm9MqtCfecP
3,Al,Gorilla,1,0,Fitness,1900-10-18,ayyyeee,Go K.K. Rider,Active,Active,Red,White,concrete wall,green rubber flooring,1452;4078;4013;833;4116;3697;7845;3307;3946;39...,gor08,LBifxETQJGEaLhBjC
4,Alfonso,Alligator,1,0,Play,1900-06-09,it'sa me,Forest Life,Simple,Simple,Red,Blue,yellow playroom wall,green honeycomb tile,4763;3205;3701;1557;3623;85;3208;3584;4761;121...,crd00,REpd8KxB8p9aGBRSE


## Changing the Species to numerical values

In [8]:
villagers["Species"] = villagers["Species"].replace(["Cow", "Bull"], "Cattle")

In [9]:
species = sorted(villagers.Species.unique())
print(species)
len(species)

['Alligator', 'Anteater', 'Bear', 'Bird', 'Cat', 'Cattle', 'Chicken', 'Cub', 'Deer', 'Dog', 'Duck', 'Eagle', 'Elephant', 'Frog', 'Goat', 'Gorilla', 'Hamster', 'Hippo', 'Horse', 'Kangaroo', 'Koala', 'Lion', 'Monkey', 'Mouse', 'Octopus', 'Ostrich', 'Penguin', 'Pig', 'Rabbit', 'Rhino', 'Sheep', 'Squirrel', 'Tiger', 'Wolf']


34

In [10]:
species_dict = dict(zip(species, range(len(species))))
print(species_dict)

{'Alligator': 0, 'Anteater': 1, 'Bear': 2, 'Bird': 3, 'Cat': 4, 'Cattle': 5, 'Chicken': 6, 'Cub': 7, 'Deer': 8, 'Dog': 9, 'Duck': 10, 'Eagle': 11, 'Elephant': 12, 'Frog': 13, 'Goat': 14, 'Gorilla': 15, 'Hamster': 16, 'Hippo': 17, 'Horse': 18, 'Kangaroo': 19, 'Koala': 20, 'Lion': 21, 'Monkey': 22, 'Mouse': 23, 'Octopus': 24, 'Ostrich': 25, 'Penguin': 26, 'Pig': 27, 'Rabbit': 28, 'Rhino': 29, 'Sheep': 30, 'Squirrel': 31, 'Tiger': 32, 'Wolf': 33}


In [11]:
villagers["Species"].replace(species_dict, inplace = True)

In [12]:
villagers.head()

Unnamed: 0,Name,Species,Gender,Personality,Hobby,Birthday,Catchphrase,Favorite Song,Style 1,Style 2,Color 1,Color 2,Wallpaper,Flooring,Furniture List,Filename,Unique Entry ID
0,Admiral,3,1,2,Nature,1900-01-27,aye aye,Steep Hill,Cool,Cool,Black,Blue,dirt-clod wall,tatami,717;1849;7047;2736;787;5970;3449;3622;3802;410...,brd06,B3RyfNEqwGmcccRC3
1,Agent S,31,0,1,Fitness,1900-07-02,sidekick,Go K.K. Rider,Active,Simple,Blue,Black,concrete wall,colorful tile flooring,7845;7150;3468;4080;290;3971;3449;1708;4756;25...,squ05,SGMdki6dzpDZyXAw5
2,Agnes,27,0,3,Play,1900-04-21,snuffle,K.K. House,Simple,Elegant,Pink,White,gray molded-panel wall,arabesque flooring,4129;7236;7235;7802;896;3428;4027;7325;3958;71...,pig17,jzWCiDPm9MqtCfecP
3,Al,15,1,0,Fitness,1900-10-18,ayyyeee,Go K.K. Rider,Active,Active,Red,White,concrete wall,green rubber flooring,1452;4078;4013;833;4116;3697;7845;3307;3946;39...,gor08,LBifxETQJGEaLhBjC
4,Alfonso,0,1,0,Play,1900-06-09,it'sa me,Forest Life,Simple,Simple,Red,Blue,yellow playroom wall,green honeycomb tile,4763;3205;3701;1557;3623;85;3208;3584;4761;121...,crd00,REpd8KxB8p9aGBRSE
