# Pandas continues

In [1]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [2]:
import pandas as pd
import numpy as np

## Cross tabulation

Croos tabulation is used to depict the relationship between a group of features or attributes.

In [3]:
df = pd.read_csv("SomervilleHappinessSurvey2015.csv", encoding="UTF-16")
df.head()

Unnamed: 0,D,X1,X2,X3,X4,X5,X6
0,0,3,3,3,4,2,4
1,0,3,2,3,5,4,3
2,1,5,3,3,3,3,5
3,0,5,4,3,3,3,5
4,0,5,4,3,3,3,5


D = decision attribute (D) with values 0 (unhappy) and 1 (happy)

X1 = the availability of information about the city services

X2 = the cost of housing

X3 = the overall quality of public schools

X4 = your trust in the local police

X5 = the maintenance of streets and sidewalks

X6 = the availability of social community events

In [4]:
df.rename(columns={"D": "Happiness", 
                   "X1": "City_service_info",
                  "X2": "Housing_cost",
                  "X3":"Public_school",
                  "X4":"Police_trust",
                  "X5":"Infrust_development",
                  "X6":"Community_events"}, inplace=True)
df.head()

Unnamed: 0,Happiness,City_service_info,Housing_cost,Public_school,Police_trust,Infrust_development,Community_events
0,0,3,3,3,4,2,4
1,0,3,2,3,5,4,3
2,1,5,3,3,3,3,5
3,0,5,4,3,3,3,5
4,0,5,4,3,3,3,5


**df.apply method: We can manupulate the values of a same column (or a row) by this method.**

In [5]:
# apply method with dataframe
df.Happiness = df.Happiness.apply(lambda x:"Happy" if x==1 else "Unhappy")
df.head()

Unnamed: 0,Happiness,City_service_info,Housing_cost,Public_school,Police_trust,Infrust_development,Community_events
0,Unhappy,3,3,3,4,2,4
1,Unhappy,3,2,3,5,4,3
2,Happy,5,3,3,3,3,5
3,Unhappy,5,4,3,3,3,5
4,Unhappy,5,4,3,3,3,5


In [6]:
def rate_descriptor(x):
    if x==1:
        return "Not_at_all"
    elif x==2:
        return "Unlikely"
    elif x==3:
        return "Fair"
    elif x==4:
        return "Good"
    elif x==5:
        return "Excellent"
    
for i in range(1, len(df.columns)):
    df[df.columns[i]] = df[df.columns[i]].apply(lambda x:rate_descriptor(x))
    
df.head()

Unnamed: 0,Happiness,City_service_info,Housing_cost,Public_school,Police_trust,Infrust_development,Community_events
0,Unhappy,Fair,Fair,Fair,Good,Unlikely,Good
1,Unhappy,Fair,Unlikely,Fair,Excellent,Good,Fair
2,Happy,Excellent,Fair,Fair,Fair,Fair,Excellent
3,Unhappy,Excellent,Good,Fair,Fair,Fair,Excellent
4,Unhappy,Excellent,Good,Fair,Fair,Fair,Excellent


In [7]:
pd.crosstab(df.Happiness, df.Police_trust)

Police_trust,Excellent,Fair,Good,Not_at_all,Unlikely
Happiness,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Happy,16,24,34,2,1
Unhappy,10,24,26,1,5


In [8]:
pd.crosstab(df.Housing_cost, df.Happiness)

Happiness,Happy,Unhappy
Housing_cost,Unnamed: 1_level_1,Unnamed: 2_level_1
Excellent,5,2
Fair,24,22
Good,9,11
Not_at_all,14,16
Unlikely,25,15


In [9]:
titanic = pd.read_csv('train.csv')
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [10]:
pd.crosstab(titanic.Pclass, titanic.Survived)

Survived,0,1
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1
1,80,136
2,97,87
3,372,119


In [11]:
pd.crosstab(titanic.Embarked, titanic.Survived)

Survived,0,1
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1
C,75,93
Q,47,30
S,427,217


In [12]:
pd.crosstab(titanic.Embarked, titanic.Survived, margins=True)

Survived,0,1,All
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C,75,93,168
Q,47,30,77
S,427,217,644
All,549,340,889


**Comparing with multiple element**

In [14]:
pd.crosstab(titanic.Embarked, [titanic.Survived, titanic.Sex, titanic.Pclass], margins=False)

Survived,0,0,0,0,0,0,1,1,1,1,1,1
Sex,female,female,female,male,male,male,female,female,female,male,male,male
Pclass,1,2,3,1,2,3,1,2,3,1,2,3
Embarked,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
C,1,0,8,25,8,33,42,7,15,17,2,10
Q,0,0,9,1,1,36,1,2,24,0,0,3
S,2,6,55,51,82,231,46,61,33,28,15,34


**Using aggfunc and values**

In [16]:
pd.crosstab(titanic.Survived, titanic.Sex, values=titanic.Age, aggfunc=np.mean)

Sex,female,male
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1
0,25.046875,31.618056
1,28.847716,27.276022


In [17]:
pd.crosstab?