## SPEED DATING EXPERIMENT (classification)

In [15]:
import os
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn import linear_model, feature_selection, neighbors, metrics, grid_search, cross_validation

pd.set_option('display.max_rows', 10)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_columns', 10)

%matplotlib inline
plt.style.use('ggplot')


In [16]:
df_raw = pd.read_csv(os.path.join('..', 'CODE', 'speed-dating-experiment', 'Speed Dating Data.csv'))

In [17]:
df_raw

Unnamed: 0,iid,id,gender,idg,condtn,...,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
0,1,1.0,0,1,1,...,,,,,
1,1,1.0,0,1,1,...,,,,,
2,1,1.0,0,1,1,...,,,,,
3,1,1.0,0,1,1,...,,,,,
4,1,1.0,0,1,1,...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
8373,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8374,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8375,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8376,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0


In [18]:
df_raw.columns

Index([u'iid', u'id', u'gender', u'idg', u'condtn', u'wave', u'round',
       u'position', u'positin1', u'order',
       ...
       u'attr3_3', u'sinc3_3', u'intel3_3', u'fun3_3', u'amb3_3', u'attr5_3',
       u'sinc5_3', u'intel5_3', u'fun5_3', u'amb5_3'],
      dtype='object', length=195)

In [19]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8378 entries, 0 to 8377
Columns: 195 entries, iid to amb5_3
dtypes: float64(174), int64(13), object(8)
memory usage: 12.5+ MB


In [20]:
df = df_raw[df_raw.columns[0:]]

### Dropping Uncessisary Columns:

In [21]:
df.drop('position', axis = 1, inplace = True)
df.drop('positin1', axis = 1, inplace = True)
df.drop('field', axis = 1, inplace = True)
df.drop('field_cd', axis = 1, inplace = True)
df.drop('undergrd', axis = 1, inplace = True)
df.drop('mn_sat', axis = 1, inplace = True)
df.drop('tuition', axis = 1, inplace = True)
df.drop('from', axis = 1, inplace = True)
df.drop('zipcode', axis = 1, inplace = True)
df.drop('income', axis = 1, inplace = True)
df.drop('career', axis = 1, inplace = True)
df.drop('career_c', axis = 1, inplace = True)

df.drop('id', axis = 1, inplace = True)
df.drop('idg', axis = 1, inplace = True)
df.drop('order', axis = 1, inplace = True)
df.drop('partner', axis = 1, inplace = True)
df.drop('partner', axis = 1, inplace = True)
df.drop('int_corr', axis = 1, inplace = True)
df.drop('race_o', axis = 1, inplace = True)
df.drop('race', axis = 1, inplace = True)

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8378 entries, 0 to 8377
Columns: 183 entries, iid to amb5_3
dtypes: float64(171), int64(12)
memory usage: 11.7 MB


In [220]:
df_initial_survey = df[['date', 'go_out', 'sports', 'tvsports', 'exercise', 'dining', 'museums', 'art', 'hiking', 'gaming', 'clubbing', 'reading', 'tv', 'theater', 'movies', 'concerts', 'music', 'shopping', 'yoga']]

In [221]:
df_initial_survey

Unnamed: 0,date,go_out,sports,tvsports,exercise,...,movies,concerts,music,shopping,yoga
0,7.0,1.0,9.0,2.0,8.0,...,10.0,10.0,9.0,8.0,1.0
1,7.0,1.0,9.0,2.0,8.0,...,10.0,10.0,9.0,8.0,1.0
2,7.0,1.0,9.0,2.0,8.0,...,10.0,10.0,9.0,8.0,1.0
3,7.0,1.0,9.0,2.0,8.0,...,10.0,10.0,9.0,8.0,1.0
4,7.0,1.0,9.0,2.0,8.0,...,10.0,10.0,9.0,8.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
8373,2.0,1.0,8.0,2.0,5.0,...,9.0,10.0,10.0,7.0,3.0
8374,2.0,1.0,8.0,2.0,5.0,...,9.0,10.0,10.0,7.0,3.0
8375,2.0,1.0,8.0,2.0,5.0,...,9.0,10.0,10.0,7.0,3.0
8376,2.0,1.0,8.0,2.0,5.0,...,9.0,10.0,10.0,7.0,3.0


In [22]:
df.columns

Index([u'iid', u'id', u'gender', u'idg', u'condtn', u'wave', u'round',
       u'order', u'partner', u'pid',
       ...
       u'attr3_3', u'sinc3_3', u'intel3_3', u'fun3_3', u'amb3_3', u'attr5_3',
       u'sinc5_3', u'intel5_3', u'fun5_3', u'amb5_3'],
      dtype='object', length=183)

In [224]:
df.drop(df_initial_survey, inplace = True, axis = 1)

In [225]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8378 entries, 0 to 8377
Columns: 164 entries, iid to amb5_3
dtypes: float64(152), int64(12)
memory usage: 10.5 MB


In [24]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8378 entries, 0 to 8377
Columns: 195 entries, iid to amb5_3
dtypes: float64(174), int64(13), object(8)
memory usage: 12.5+ MB


In [25]:
df.count()

iid         8378
id          8377
gender      8378
idg         8378
condtn      8378
            ... 
attr5_3     2016
sinc5_3     2016
intel5_3    2016
fun5_3      2016
amb5_3      2016
dtype: int64

In [228]:
df_12 = df[df.wave == 12]

df_12

Unnamed: 0,iid,id,gender,idg,condtn,...,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
4452,294,1.0,0,1,2,...,,,,,
4453,294,1.0,0,1,2,...,,,,,
4454,294,1.0,0,1,2,...,,,,,
4455,294,1.0,0,1,2,...,,,,,
4456,294,1.0,0,1,2,...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
4839,321,14.0,1,27,2,...,,,,,
4840,321,14.0,1,27,2,...,,,,,
4841,321,14.0,1,27,2,...,,,,,
4842,321,14.0,1,27,2,...,,,,,


In [227]:
df_12.groupby(['iid'])['match'].sum().describe()

count    28.000000
mean      1.500000
std       1.427248
min       0.000000
25%       0.750000
50%       1.000000
75%       2.000000
max       6.000000
Name: match, dtype: float64

****

***
## General Variable KEY:

| Variable | Description |
| ---| ---|
|attr | Attractive|
|sinc |Sincere  |
|intel | Intelligent|
| fun | Fun|
| amb | Ambitious|
| shar |Shared Interests/Hobbies

***Each attribute has a unique code at the end of the variable which references the survey question and when in the experiment the question was being asked*** (signup, during dating expirement, after dating experiment)

Example: 

attr**1_1** 

Variable: attractivness

Question: 'what do you look for in the opposite sex?' 

Point in experiment: signup survey

*vs.*

attr**1_2** 

Variable: attractivness

Question: 'what do you look for in the opposite sex?' 

Point in experiment: after dating event

***

## QUESTION 1:	
### Does one’s perception of themselves predict their dating outcomes? Does this differ by gender?

**Hypothesis**: people who have lower self esteem (i.e. negatively evaluate themselves by giving lower scores on the attribute scale) will get less dates/matches; while those who give themselves higher ratings will get more.  Women are more likely to give themselves more critical ratings than men, thus negatively affecting their outcome. 



| Variable CODE | Scale | When during Experiment? |Question| 
| :------:| :------:| :------: |:------|
|  **3_1**| 1-10 | Signup| Based on what you think the opposite sex looks for in a date, how do you think you measure up?
|**3_2**| 1-10| After event| Based on what you think the opposite sex looks for in a date, how do you think you measure up?
|**5_1**| 1-10| Signup|How do you think others perceive you? |
|**5_2**| 1-10| After event|How do you think others perceive you? |
|**3_s**| 1-10| During event|Rate your opinion of your own attributes  |


**exphappy**: Overall, on a scale of 1-10, how happy do you expect to be with the people you meet during the speed-dating event.

**expnum**: Out of the 20 people you will meet, how many do you expect will be interested in dating you? 

**match_es**: How many matches do you estimate you will get (a match occurs when you and your partner both check “Yes” next to decision)?: 

**match**	 (1=yes, 0=no)

**dec**: decision (1=yes, 0 = no)

**dec_o**: decision of partner (1=yes, 0 = no)

**round**: number of people that met in wave

**iid**: 	unique subject number, group(wave id gender): use this to count # of matches someone got 

**gender** (1=M | 0 =F)


> ### ISSUES/QUESTIONS
- FIND WAYS TO WEIGHT THE AVERAGE OF SCORES
- FIND A WAY TO LOOK AT SELF PERCEPTION VS. WHAT OTHERS THINK 
(i.e. someone could give themselves a '10' attractive score' but other's only gave them an '7' but they get the most dates b/c have high self-esteem. 
- HOW TO ADD UP THE # OF MATCHES SOMEONE GETS (reference iid#?)
- WHICH VARIABLES ABOVE SHOULD I USE? SHOULD I WEIGH THEM? THEY HAVE DIFFERENT # OF OBSERVATIONS  

In [26]:
df.groupby(['iid'])['match'].sum()

iid
1      4
2      2
3      0
4      2
5      2
      ..
548    5
549    5
550    4
551    2
552    6
Name: match, dtype: int64

#### Attractivness at 3_1 (# of observations = 8273)

In [27]:
len(df.attr3_1.dropna())

8273

In [28]:
len(df.sinc3_1.dropna())

8273

#### Attractivness at 3_2 (# of observations = 7463)

In [29]:
df.attr3_2.unique()

array([  6.,   7.,  nan,   5.,  10.,   8.,   3.,   9.,   4.,   2.])

In [30]:
len(df.attr3_2.dropna())

7463

In [31]:
len(df.sinc3_2.dropna())

7463

#### Attractivness at  5_1 (# of observations = 4906)

In [32]:
len(df.attr5_1.dropna())

4906

In [33]:
len(df.sinc5_1.dropna())

4906

#### Attractivness at 5_2 (# of observations = 4377)

In [34]:
len(df.attr5_2.dropna())

4377

In [35]:
len(df.sinc5_2.dropna())

4377

#### Attractivness at  3_s (# of observations = 4000)

In [36]:
len(df.attr3_s.dropna())

4000

In [37]:
len(df.sinc3_s.dropna())

4000

### Look at exphappy, expnum and match_es  (&round)

___expnum___:  Overall, on a scale of 1-10, how happy do you expect to be with the people you meet during the speed-dating event

In [38]:
len(df.expnum.dropna())

1800

In [39]:
df.expnum.describe()

#data during signup 

count    1800.000000
mean        5.570556
std         4.762569
min         0.000000
25%         2.000000
50%         4.000000
75%         8.000000
max        20.000000
Name: expnum, dtype: float64

***match_es***: How many matches do you estimate you will get (a match occurs when you and your partner both check “Yes” next to decision)? (this was during the experiment after meeting people)

In [40]:
df.match_es.describe()

#data during experiment

count    7205.000000
mean        3.207814
std         2.444813
min         0.000000
25%         2.000000
50%         3.000000
75%         4.000000
max        18.000000
Name: match_es, dtype: float64

***round***: number of people that met in wave

In [41]:
df[['round']].describe()

Unnamed: 0,round
count,8378.0
mean,16.872046
std,4.358458
min,5.0
25%,14.0
50%,18.0
75%,20.0
max,22.0


>Observations: max # of people that a round met with was 22 people; match_es max # of people thought they would match with was 18

***exphappy***: Overall, on a scale of 1-10, how happy do you expect to be with the people you meet during the speed-dating event.

In [42]:
df.exphappy.describe()

count    8277.000000
mean        5.534131
std         1.734059
min         1.000000
25%         5.000000
50%         6.000000
75%         7.000000
max        10.000000
Name: exphappy, dtype: float64

### Look at 3_1  - attractiveness

Q: Based on what do you think the opposite sex looks for in a date, how do you think you measure up?  **at signup**

In [43]:
subset_df = df[df.columns[0:]]

In [44]:
len(subset_df.attr3_1)

8378

In [45]:
subset_df.attr3_1.dropna(inplace = True)

#removing NaN values

In [46]:
len(subset_df.attr3_1)

8273

In [47]:
subset_df

Unnamed: 0,iid,id,gender,idg,condtn,...,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
0,1,1.0,0,1,1,...,,,,,
1,1,1.0,0,1,1,...,,,,,
2,1,1.0,0,1,1,...,,,,,
3,1,1.0,0,1,1,...,,,,,
4,1,1.0,0,1,1,...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
8373,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8374,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8375,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8376,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0


In [48]:
subset_df.attr3_1.describe()

count    8273.000000
mean        7.084733
std         1.395783
min         2.000000
25%         6.000000
50%         7.000000
75%         8.000000
max        10.000000
Name: attr3_1, dtype: float64

#### Looking at match_es

In [49]:
subset_df.match_es.describe()

count    7205.000000
mean        3.207814
std         2.444813
min         0.000000
25%         2.000000
50%         3.000000
75%         4.000000
max        18.000000
Name: match_es, dtype: float64

In [50]:
len(subset_df.match_es)

8378

In [51]:
len(subset_df.match_es.dropna())

7205

In [52]:
subset_df.match_es.dropna(inplace = True)

#removing NaN values

In [53]:
len(subset_df.match_es)

7205

#### Looking at match (# of matches)

In [54]:
subset_df.match.describe()

count    8378.000000
mean        0.164717
std         0.370947
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         1.000000
Name: match, dtype: float64

In [55]:
len(subset_df.match)

8378

In [56]:
len(subset_df.match.dropna())

8378

In [57]:
subset_df.attr3_1.value_counts()

7.0     2914
8.0     2217
6.0     1100
9.0      729
5.0      642
10.0     268
4.0      238
3.0      145
2.0       20
Name: attr3_1, dtype: int64

In [58]:
dummy_ranks = pd.get_dummies(subset_df.attr3_1, prefix = 'attr_3_1_self')

In [59]:
dummy_ranks

Unnamed: 0,attr_3_1_self_2.0,attr_3_1_self_3.0,attr_3_1_self_4.0,attr_3_1_self_5.0,attr_3_1_self_6.0,attr_3_1_self_7.0,attr_3_1_self_8.0,attr_3_1_self_9.0,attr_3_1_self_10.0
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
8373,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8374,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8375,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8376,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [60]:
dummy_ranks.rename(columns={'attr_3_1_self_1.0': 'attr_3_1_self_1',
                        'attr_3_1_self_2.0': 'attr_3_1_self_2',
                        'attr_3_1_self_3.0': 'attr_3_1_self_3',
                        'attr_3_1_self_4.0': 'attr_3_1_self_4',
                        'attr_3_1_self_5.0': 'attr_3_1_self_5',
                        'attr_3_1_self_6.0': 'attr_3_1_self_6',
                        'attr_3_1_self_7.0': 'attr_3_1_self_7',
                        'attr_3_1_self_8.0': 'attr_3_1_self_8',
                        'attr_3_1_self_9.0': 'attr_3_1_self_9',
                        'attr_3_1_self_10.0': 'attr_3_1_self_10',}, inplace = True)


dummy_ranks

Unnamed: 0,attr_3_1_self_2,attr_3_1_self_3,attr_3_1_self_4,attr_3_1_self_5,attr_3_1_self_6,attr_3_1_self_7,attr_3_1_self_8,attr_3_1_self_9,attr_3_1_self_10
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
8373,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8374,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8375,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8376,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [61]:
subset_df = subset_df.join([dummy_ranks])

In [62]:
##subset_df.drop('attr3_1', axis = 1, inplace = True)

#### Look at Attractivness (3_1) & Match Rating

In [63]:
pd.crosstab(subset_df.attr_3_1_self_10, subset_df.match, margins=True)

match,0,1,All
attr_3_1_self_10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,6713,1292,8005
1.0,209,59,268
All,6998,1380,8378


> Odds of getting a match if found themselves a 10/10 attractive vs. found themselves below a 10 = 59:209 

### Look at match & attractivness  (3_1, 3_2, 5_1, 5_2, 3_s) compared by gender 

#### FEMALE:

In [64]:
df_gender_female = subset_df[subset_df.gender == 0]
df_gender_female[['attr3_1']].describe()

#q: based on what you think the opposite gender looks for in a mate, 
#   how do you measure up? (signup)

Unnamed: 0,attr3_1
count,4117.0
mean,7.219092
std,1.336886
min,2.0
25%,7.0
50%,7.0
75%,8.0
max,10.0


>ave rating for women: 7.22

In [65]:
df_gender_female[['attr3_2']].describe()

#q: based on what you think the opposite gender looks for in a mate, 
#   how do you measure up? (after event)

Unnamed: 0,attr3_2
count,3653.0
mean,7.234054
std,1.329677
min,2.0
25%,7.0
50%,7.0
75%,8.0
max,10.0


In [66]:
df_gender_female[['attr5_1']].describe()

#q: how do you think others perceive you? (signup)

Unnamed: 0,attr5_1
count,2459.0
mean,7.017893
std,1.414963
min,3.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


In [67]:
df_gender_female[['attr5_2']].describe()

#q: how do you think others perceive you? (after event)

Unnamed: 0,attr5_2
count,2167.0
mean,6.808952
std,1.366886
min,2.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


In [68]:
df_gender_female[['attr3_s']].describe()

#q: rate opinion of attributes (during event)

Unnamed: 0,attr3_s
count,1940.0
mean,7.462887
std,1.266526
min,3.0
25%,7.0
50%,8.0
75%,8.0
max,10.0


**Look at women w/ self rating of '10' for 3_1:**

In [69]:
pd.crosstab(df_gender_female.attr_3_1_self_10, df_gender_female.match, margins=True)

match,0,1,All
attr_3_1_self_10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,3385,667,4052
1.0,58,7,65
All,3494,690,4184


Probability of women getting a date if gave themselves higher 'attractive' scores:


In [70]:
p = (7./65.)
per = p*100
per

10.76923076923077

Odds of women getting a date if rating self as '10' for attractive vs. other scores:  __7:58__

Odds ratio: odds of women getting a date if rated themselves as '10' attractiveness vs. women who rated themselves lower 

In [71]:
##Odds ratio: 

o = (7./58.) / (667./3385.)
o

0.6124954763997312

> Odds of getting a date if you were a women who rated themselves as a '10' vs those who rated themselves lower is 40% higher

#### MALE:

In [72]:
df_gender_male = subset_df[subset_df.gender == 1]
df_gender_male[['attr3_1']].describe()

Unnamed: 0,attr3_1
count,4156.0
mean,6.951636
std,1.439621
min,2.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


>ave rating for men at signup: 6.95

In [73]:
df_gender_male[['attr3_2']].describe()

Unnamed: 0,attr3_2
count,3810.0
mean,7.020997
std,1.402499
min,2.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


>ave rating for men after:7.02

In [74]:
df_gender_male[['attr5_1']].describe()

Unnamed: 0,attr5_1
count,2447.0
mean,6.86555
std,1.574891
min,2.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


In [75]:
df_gender_male[['attr5_2']].describe()

Unnamed: 0,attr5_2
count,2210.0
mean,6.846606
std,1.453209
min,2.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


In [76]:
df_gender_male[['attr3_s']].describe()

Unnamed: 0,attr3_s
count,2060.0
mean,6.974272
std,1.504911
min,3.0
25%,6.0
50%,7.0
75%,8.0
max,10.0


**Look at men w/ self rating of '10' at 3_1:**

In [77]:
pd.crosstab(df_gender_male.attr_3_1_self_10, df_gender_male.match, margins=True)

match,0,1,All
attr_3_1_self_10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,3328,625,3953
1.0,151,52,203
All,3504,690,4194


Probability of men getting a date if gave themselves higher 'attractive' scores:


In [78]:
p = (52./151.)
per = p*100
per

34.437086092715234

> 34% more likely to get a match

Odds ratio: odds of men getting a date if rated themselves as '10' attractiveness vs. men who rated themselves lower 

In [79]:
##Odds ratio: 

o = (52./151.) / (625./3328.)
o

1.8337059602649004

> Odds of getting a date if a male and rated themselves as '10' attractiness is 1.83:1 (183% greater chances). 

### CALCUALTE THE # OF MATCHES SOMEONE GOT - then do a correlation

> need to add up the # of matches someone gets  | need to reference id#

In [80]:
df_gender_male[['attr3_1', 'sinc3_1', 'intel3_1', 'fun3_1', 'amb3_1', 'match']].corr()

Unnamed: 0,attr3_1,sinc3_1,intel3_1,fun3_1,amb3_1,match
attr3_1,1.0,0.139699,0.411334,0.47682,0.384919,0.048772
sinc3_1,0.139699,1.0,0.263508,0.204126,0.199065,0.009998
intel3_1,0.411334,0.263508,1.0,0.275416,0.346521,0.022677
fun3_1,0.47682,0.204126,0.275416,1.0,0.373969,0.086714
amb3_1,0.384919,0.199065,0.346521,0.373969,1.0,0.050591
match,0.048772,0.009998,0.022677,0.086714,0.050591,1.0


### Look for rows of data that have values for 3_1, 3_2, 5_1, 5_2, 3_s

## QUESTION 2:	

### Does one’s perception of their gender generalizations differ from their own evaluations of what’s important when it comes to selecting mates? 

**Question 2B**: *Does this differ from self-evaluations (Q3)?  E.g.: do men rate ‘attractiveness’ as less important for their own dating choices but more important for other men?*

**Hypothesis**: men will rate ‘attractiveness’ as less important for their own dating choices but more important for other men’s decisions when choosing a partner. 



| Variable CODE | Scale | When during Experiment? |Question| 
| :------:| :------:| :------: |:------|
|  **4_1**| 100pts | Signup| what you think MOST of your fellow men/women look for in the opposite sex.
|**4_2**| 100pts| After event| what you think MOST of your fellow men/women look for in the opposite sex.

gender (1=M | 0 =F)


In [81]:
df[['attr4_1', 'sinc4_1', 'intel4_1', 'fun4_1', 'amb4_1', 'shar4_1']].describe()

Unnamed: 0,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1
count,6489.0,6489.0,6489.0,6489.0,6489.0,6467.0
mean,26.39436,11.071506,12.636308,15.566805,9.780089,11.014845
std,16.297045,6.659233,6.717476,7.328256,6.998428,6.06015
min,5.0,0.0,0.0,0.0,0.0,0.0
25%,10.0,6.0,8.0,10.0,5.0,7.0
50%,25.0,10.0,10.0,15.0,10.0,10.0
75%,35.0,15.0,16.0,20.0,15.0,15.0
max,95.0,35.0,35.0,45.0,50.0,40.0


**What MEN think OTHER MEN look for:**

***(signup survey)***

In [82]:
df_gender_male[['attr4_1', 'sinc4_1', 'intel4_1', 'fun4_1', 'amb4_1', 'shar4_1']].describe()

Unnamed: 0,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1
count,3246.0,3246.0,3246.0,3246.0,3246.0,3246.0
mean,28.075786,10.774184,12.163894,16.204251,7.69008,10.871534
std,17.333895,6.886969,7.144315,7.738396,5.834498,6.220683
min,5.0,0.0,0.0,0.0,0.0,0.0
25%,10.0,5.0,7.0,10.0,4.0,5.0
50%,25.0,10.0,10.0,15.0,6.0,10.0
75%,40.0,15.0,17.5,20.0,10.0,15.0
max,95.0,35.0,35.0,40.0,30.0,30.0


***(after dating survey)***

In [109]:
df_gender_male[['attr4_2', 'sinc4_2', 'intel4_2', 'fun4_2', 'amb4_2', 'shar4_2']].describe()

Unnamed: 0,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2
count,2940.0,2940.0,2940.0,2940.0,2940.0,2940.0
mean,28.953401,11.112585,11.188095,15.755442,8.095918,10.941497
std,17.705471,6.395381,5.919357,8.044305,5.675978,6.872334
min,7.0,0.0,0.0,0.0,0.0,0.0
25%,15.0,7.0,7.0,9.0,5.0,7.0
50%,25.0,10.0,10.0,15.0,7.0,10.0
75%,40.0,15.0,15.0,20.0,10.0,15.0
max,100.0,30.0,30.0,50.0,30.0,40.0


**What MEN ACTUALLY state they look for:**

***(signup survey)***

In [107]:
df_gender_male[['attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1']].describe()

Unnamed: 0,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1
count,4174.0,4174.0,4174.0,4164.0,4154.0,4154.0
mean,26.921689,16.498436,19.537374,17.763893,8.552829,10.996574
std,13.669663,7.212455,6.709546,6.573848,5.985803,6.7702
min,6.67,0.0,0.0,0.0,0.0,0.0
25%,20.0,10.0,16.67,15.0,5.0,5.0
50%,23.905,17.39,20.0,18.75,10.0,10.0
75%,30.0,20.0,23.08,20.0,12.0,15.34
max,100.0,40.0,42.86,50.0,53.0,30.0


***(after dating survey):***

In [110]:
df_gender_male[['attr1_2', 'sinc1_2', 'intel1_2', 'fun1_2', 'amb1_2', 'shar1_2']].describe()

Unnamed: 0,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2
count,3810.0,3810.0,3810.0,3810.0,3810.0,3810.0
mean,30.344913,15.067667,16.655877,17.837824,8.475916,11.53543
std,15.645742,6.865304,6.730171,6.258742,5.73553,6.385931
min,10.0,0.0,0.0,0.0,0.0,0.0
25%,20.0,10.0,15.0,15.0,5.0,8.51
50%,25.0,15.0,18.0,19.05,10.0,10.0
75%,40.0,20.0,20.0,20.0,12.74,15.0
max,85.0,30.0,40.0,40.0,20.0,30.0


***(during event - per person)***

In [121]:
df_gender_male[['attr', 'sinc', 'intel', 'fun', 'amb', 'shar']].describe()

Unnamed: 0,attr,sinc,intel,fun,amb,shar
count,4093.0,4047.0,4045.0,4027.0,3844.0,3703.0
mean,6.460029,7.25068,7.289988,6.519866,6.603278,5.540373
std,1.858675,1.611979,1.484826,1.8375,1.723994,2.096485
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.0,6.0,6.0,5.0,5.0,4.0
50%,7.0,7.0,7.0,7.0,7.0,6.0
75%,8.0,8.0,8.0,8.0,8.0,7.0
max,10.0,10.0,10.0,10.0,10.0,10.0


*** (what men said really mattered based on actual dates):***

In [123]:
df_gender_male[['attr7_2', 'sinc7_2', 'intel7_2', 'fun7_2', 'amb7_2', 'shar7_2']].describe()

Unnamed: 0,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2
count,957.0,947.0,957.0,957.0,947.0,947.0
mean,37.314525,11.684266,14.986416,19.342738,6.386484,10.394931
std,17.56912,7.807312,7.581001,8.842626,6.692786,7.638385
min,10.0,0.0,0.0,0.0,0.0,0.0
25%,25.0,5.0,10.0,15.0,0.0,5.0
50%,30.0,10.0,15.0,20.0,5.0,10.0
75%,50.0,20.0,20.0,23.0,10.0,15.0
max,80.0,40.0,50.0,50.0,20.0,30.0


****

**What WOMEN think OTHER WOMEN look for:**

***(signup survey)***

In [112]:
df_gender_female[['attr4_1', 'sinc4_1', 'intel4_1', 'fun4_1', 'amb4_1', 'shar4_1']].describe()

Unnamed: 0,attr4_1,sinc4_1,intel4_1,fun4_1,amb4_1,shar4_1
count,3243.0,3243.0,3243.0,3243.0,3243.0,3221.0
mean,24.711378,11.369103,13.109158,14.92877,11.872032,11.159267
std,15.003638,6.410471,6.226482,6.835255,7.429162,5.89139
min,5.0,0.0,0.0,0.0,0.0,0.0
25%,10.0,7.0,9.0,10.0,7.0,8.0
50%,20.0,10.0,10.0,15.0,10.0,10.0
75%,30.0,15.0,16.0,20.0,15.0,15.0
max,80.0,35.0,30.0,45.0,50.0,40.0


***(after dating event):***

In [113]:
df_gender_female[['attr4_2', 'sinc4_2', 'intel4_2', 'fun4_2', 'amb4_2', 'shar4_2']].describe()

Unnamed: 0,attr4_2,sinc4_2,intel4_2,fun4_2,amb4_2,shar4_2
count,2835.0,2835.0,2835.0,2835.0,2835.0,2835.0
mean,24.579541,12.776014,13.051852,14.550265,10.635273,11.714286
std,14.605768,6.298188,5.917015,6.358541,5.760354,5.610964
min,6.0,0.0,2.0,0.0,0.0,0.0
25%,10.0,8.0,9.0,10.0,7.0,8.0
50%,20.0,10.0,10.0,15.0,10.0,10.0
75%,30.0,16.0,17.0,20.0,15.0,15.0
max,85.0,35.0,40.0,40.0,35.0,30.0


**What WOMEN ACTUALLY state they look for:**

***(at signup survey)***

In [114]:
df_gender_female[['attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1']].describe()

Unnamed: 0,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1
count,4125.0,4125.0,4125.0,4125.0,4125.0,4103.0
mean,18.055224,18.305008,21.002502,17.147292,12.827222,12.704194
std,9.496919,6.755334,6.778147,5.53211,5.481803,5.796059
min,0.0,0.0,2.0,0.0,0.0,0.0
25%,13.04,15.0,18.0,15.0,10.0,10.0
50%,15.38,20.0,20.0,17.78,15.0,13.21
75%,20.0,20.0,25.0,20.0,16.67,16.0
max,90.0,60.0,50.0,40.0,30.0,30.0


***(after dating event):***

In [115]:
df_gender_female[['attr1_2', 'sinc1_2', 'intel1_2', 'fun1_2', 'amb1_2', 'shar1_2']].describe()

Unnamed: 0,attr1_2,sinc1_2,intel1_2,fun1_2,amb1_2,shar1_2
count,3635.0,3653.0,3653.0,3653.0,3653.0,3653.0
mean,21.890754,16.696773,19.021396,17.463838,11.412737,14.037736
std,11.442558,6.330506,6.097872,5.987142,5.206001,6.683525
min,5.0,0.0,0.0,0.0,0.0,0.0
25%,15.0,14.81,15.0,15.0,10.0,10.0
50%,20.0,17.65,20.0,17.86,10.0,15.0
75%,25.0,20.0,20.0,20.0,15.0,18.0
max,85.0,50.0,40.0,50.0,22.22,35.0


***(during dating event)***

In [117]:
df_gender_female[['attr', 'sinc', 'intel', 'fun', 'amb', 'shar']].describe()

Unnamed: 0,attr,sinc,intel,fun,amb,shar
count,4083.0,4054.0,4037.0,4001.0,3822.0,3608.0
mean,5.9193,7.099778,7.447362,6.280555,6.952773,5.407012
std,2.001754,1.856756,1.609873,2.057324,1.845563,2.214393
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.0,6.0,7.0,5.0,6.0,4.0
50%,6.0,7.0,8.0,6.0,7.0,6.0
75%,7.0,8.0,8.0,8.0,8.0,7.0
max,10.0,10.0,10.0,10.0,10.0,10.0


In [120]:
df_gender_male[['attr_o', 'sinc_o', 'intel_o', 'fun_o', 'amb_o', 'shar_o']].describe()

Unnamed: 0,attr_o,sinc_o,intel_o,fun_o,amb_o,shar_o
count,4083.0,4054.0,4037.0,4001.0,3822.0,3608.0
mean,5.919422,7.099778,7.447362,6.280555,6.952773,5.407012
std,2.002019,1.856756,1.609873,2.057324,1.845563,2.214393
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.0,6.0,7.0,5.0,6.0,4.0
50%,6.0,7.0,8.0,6.0,7.0,6.0
75%,7.0,8.0,8.0,8.0,8.0,7.0
max,10.5,10.0,10.0,10.0,10.0,10.0


*** (what women said really mattered based on actual dates):***

In [125]:
df_gender_female[['attr7_2', 'sinc7_2', 'intel7_2', 'fun7_2', 'amb7_2', 'shar7_2']].describe()

Unnamed: 0,attr7_2,sinc7_2,intel7_2,fun7_2,amb7_2,shar7_2
count,1027.0,1008.0,1027.0,1027.0,1008.0,1027.0
mean,28.630964,15.263889,15.580331,18.426485,8.132937,13.779942
std,15.645847,7.746852,7.005298,8.219646,5.407541,8.445691
min,10.0,0.0,0.0,0.0,0.0,0.0
25%,18.0,10.0,10.0,10.0,5.0,5.0
50%,25.0,15.0,15.0,18.0,10.0,15.0
75%,40.0,20.0,20.0,25.0,10.0,20.0
max,65.0,40.0,40.0,40.0,20.0,40.0


****

In [164]:
df_female_match_sum = df_gender_female.groupby(['iid'])['match'].sum()
df[df_female_match_sum]


Unnamed: 0,condtn,gender,iid,gender.1,gender.2,...,gender.3,iid.1,iid.2,gender.4,gender.5
0,1,0,1,0,0,...,0,1,1,0,0
1,1,0,1,0,0,...,0,1,1,0,0
2,1,0,1,0,0,...,0,1,1,0,0
3,1,0,1,0,0,...,0,1,1,0,0
4,1,0,1,0,0,...,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
8373,2,1,552,1,1,...,1,552,552,1,1
8374,2,1,552,1,1,...,1,552,552,1,1
8375,2,1,552,1,1,...,1,552,552,1,1
8376,2,1,552,1,1,...,1,552,552,1,1


In [167]:
df_female_match_sum

iid
1      4
2      2
3      0
4      2
5      2
      ..
526    2
527    0
528    0
529    2
530    2
Name: match, dtype: int64

In [156]:
## WOMEN  - matches
## info re. women's matches
df_female_match_sum.describe()

count    274.000000
mean       2.518248
std        2.355267
min        0.000000
25%        1.000000
50%        2.000000
75%        3.000000
max       14.000000
Name: match, dtype: float64

In [203]:
## WOMEN  - matches
df_gender_female.groupby(['idg'])['match'].sum().describe()

## info re. women's matches

count    34.000000
mean     20.294118
std      18.306365
min       0.000000
25%       2.250000
50%      19.000000
75%      36.750000
max      55.000000
Name: match, dtype: float64

In [132]:
## MEN - matches

df_gender_male.groupby(['iid'])['match'].sum().describe()

count    277.000000
mean       2.490975
std        2.222438
min        0.000000
25%        1.000000
50%        2.000000
75%        4.000000
max       11.000000
Name: match, dtype: float64

In [145]:
(df_gender_female.groupby(['iid'])['match'].sum()).sum()

## info re. women's matches

690

In [157]:
df_female_match_sum.sum()

690

In [148]:
len(df_gender_female)

4184

In [187]:
df_gender_female.groupby(['iid'])['match'].sum().max()

14

In [134]:
df_gender_female.groupby(['iid'])['dec'].sum().describe()

##females decisions - info re. how many men women said 'yes' to

count    274.000000
mean       5.580292
std        4.005826
min        0.000000
25%        3.000000
50%        5.000000
75%        8.000000
max       21.000000
Name: dec, dtype: float64

In [135]:
df_gender_female.groupby(['iid'])['dec_o'].sum().describe()

##info re. how many men said 'yes' to women

count    274.000000
mean       7.248175
std        4.315881
min        0.000000
25%        4.000000
50%        7.000000
75%       10.000000
max       20.000000
Name: dec_o, dtype: float64

In [190]:
df_male_match_sum = df_gender_male.groupby(['iid'])['match'].sum()

Unnamed: 0,iid,gender,condtn,partner,idg,...,wave,wave.1,condtn.1,gender.1,round
0,1,0,1,1,1,...,1,1,1,0,10
1,1,0,1,2,1,...,1,1,1,0,10
2,1,0,1,3,1,...,1,1,1,0,10
3,1,0,1,4,1,...,1,1,1,0,10
4,1,0,1,5,1,...,1,1,1,0,10
...,...,...,...,...,...,...,...,...,...,...,...
8373,552,1,2,18,44,...,21,21,2,1,22
8374,552,1,2,19,44,...,21,21,2,1,22
8375,552,1,2,20,44,...,21,21,2,1,22
8376,552,1,2,21,44,...,21,21,2,1,22


In [191]:
df_male_match_sum

iid
11     0
12     2
13     4
14     8
15     3
      ..
548    5
549    5
550    4
551    2
552    6
Name: match, dtype: int64

In [192]:
df_male_match_sum.describe()

count    277.000000
mean       2.490975
std        2.222438
min        0.000000
25%        1.000000
50%        2.000000
75%        4.000000
max       11.000000
Name: match, dtype: float64

In [188]:
df_gender_male.groupby(['iid'])['dec'].sum().describe()

##men decisions - info re. how many women men said 'yes' to

count    277.000000
mean       7.180505
std        4.608190
min        0.000000
25%        4.000000
50%        6.000000
75%        9.000000
max       21.000000
Name: dec, dtype: float64

In [189]:
df_gender_male.groupby(['iid'])['dec_o'].sum().describe()

##men decisions - info re. how many women men said 'yes' to men

count    277.000000
mean       5.519856
std        3.757669
min        0.000000
25%        3.000000
50%        5.000000
75%        8.000000
max       18.000000
Name: dec_o, dtype: float64

In [197]:
df[df_male_match_sum.tail()]

Unnamed: 0,wave,wave.1,condtn,gender,round
0,1,1,1,0,10
1,1,1,1,0,10
2,1,1,1,0,10
3,1,1,1,0,10
4,1,1,1,0,10
...,...,...,...,...,...
8373,21,21,2,1,22
8374,21,21,2,1,22
8375,21,21,2,1,22
8376,21,21,2,1,22


In [202]:
df_male_attr_o_ave = df_gender_male.groupby(['iid'])[['attr_o', 'round']].mean()
df_male_attr_o_ave

Unnamed: 0_level_0,attr_o,round
iid,Unnamed: 1_level_1,Unnamed: 2_level_1
11,5.600000,10
12,7.100000,10
13,4.800000,10
14,8.200000,10
15,5.700000,10
...,...,...
548,6.857143,22
549,6.350000,22
550,5.136364,22
551,6.142857,22


In [200]:
df_male_attr_o_ave.describe()

Unnamed: 0,attr_o,round
count,277.0,277.0
mean,5.934091,15.191336
std,1.222255,5.166384
min,2.333333,5.0
25%,5.095238,10.0
50%,6.0,16.0
75%,6.875,20.0
max,8.6,22.0


****

## QUESTION 3:	
### 3.	What do men look for in the opposite sex? Does this differ from women? 

**Question 3B**: *How important do people think attractiveness is in potential mate selection vs. its real impact?*

**Hypothesis**: Hypothesis: men more likely to rate ‘attractiveness’ as more important than women when looking for a mate; women are more likely to rate ‘sincere’ as more important. 


| Variable CODE (subject) | Variable CODE (partner)| Scale | When during Experiment? |Question| 
| :---:| :----:| :------: | :------: |:------|
|  **1_1**| **pf_o_att; pf_o_sha** *rating by partner*| 100pts | Signup| what do you look for in the opposite sex? |
|  **attr; shar**| **attr_o; shar_o** *rating by partner* | 1-10 | During event (after each date)| rating of the person you are dating |
|  **1_s**| | 1-10 scale & 100pts | During Event|what do you look for in the opposite sex?  |
|  **1_2**| | 100pts | After Event| what do you look for in the opposite sex? |
|  **7_2**| | 100pts | After Event| Based on yes/no decisions during speed dating event, distribute points to attributes that best reflect the actual importance of these attributes in your decisions|
| **like** |**like_o** *rating by partner* | 1-10 | During event (after each date)|  How much do you like the person? |
| **prob** |**prob_o** *rating by partner* | 1-10 | During event (after each date)|  How probable do you think it is that this person will say 'yes' for you? |



gender (1=M | 0 =F)

dec_o: 	decision of partner the night of event

dec: decision of interviewee the night of event



In [84]:
y_max = df[ ['attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1'] ].max(axis = 1)

y_max.loc['impt_atr_other1'] = 'Attractive'
y_max.loc[df.sinc1_1 > y_max, 'impt_atr_other1'] = 'Sincere'
y_max.loc[df.inte1_1 > y_max, 'impt_atr_other1'] = 'Intelligent'
y_max.loc[df.fun1_1 > y_max, 'impt_atr_other1'] = 'Fun'
y_max.loc[df.amb1_1 > y_max, 'impt_atr_other1'] = 'Ambitious'
y_max.loc[df.shar1_1 > y_max, 'impt_atr_other1'] = 'Shared_Interests'

ValueError: Series lengths must match to compare

****

## QUESTION 4:	
### What do women THINK men look for in the opposite sex? What about men? Does it differ from before dating event to after? Does this differ from actual results (Q3)?  

**Hypothesis**: women think men give more weight to attractiveness but both men and women give the most weight to attractiveness vs. other attributes. 


| Variable CODE | Scale | When during Experiment? |Question| 
| :---:| :----:| :------: |:------|
|  **2_1**| 100pts | Signup| What do you think the opposite sex looks for in a date? |
|  **2_2**| 100pts | After Event| What do you think the opposite sex looks for in a date? |


gender (1=M | 0 =F)



#### Look at the mean, max and min for all 6 attributes at signup survey (2_1) compared to after dating event (2_2):

In [85]:
df.attr2_1.describe() 
#signup survey

count    8299.000000
mean       30.362192
std        16.249937
min         0.000000
25%        20.000000
50%        25.000000
75%        40.000000
max       100.000000
Name: attr2_1, dtype: float64

In [86]:
df.attr2_2.describe()
#after dating event

count    5775.000000
mean       29.344369
std        14.551171
min         0.000000
25%        19.150000
50%        25.000000
75%        38.460000
max        85.000000
Name: attr2_2, dtype: float64

#### Look at Males and 2_1 values

In [87]:
df_gender_male = df[df.gender == 1]
df_gender_male

Unnamed: 0,iid,id,gender,idg,condtn,...,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
100,11,1.0,1,2,1,...,,,,,
101,11,1.0,1,2,1,...,,,,,
102,11,1.0,1,2,1,...,,,,,
103,11,1.0,1,2,1,...,,,,,
104,11,1.0,1,2,1,...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
8373,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8374,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8375,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0
8376,552,22.0,1,44,2,...,9.0,5.0,9.0,5.0,6.0


#### Look at Male's rating of attractivness at 2_1

In [88]:
df_gender_male[['attr2_1']].describe()

Unnamed: 0,attr2_1
count,4174.0
mean,25.092631
std,13.334847
min,0.0
25%,16.67
50%,20.0
75%,30.0
max,95.0


#### Look at Male's rating of attractivness at 2_2

In [89]:
df_gender_male[['attr2_2']].describe()

Unnamed: 0,attr2_2
count,2940.0
mean,25.792765
std,13.65316
min,0.0
25%,16.67
50%,20.0
75%,30.0
max,80.0


#### Look at female's rating of attractivness at 2_1 & 2_2

In [90]:
df_gender_female = df[df.gender == 0]
df_gender_female

Unnamed: 0,iid,id,gender,idg,condtn,...,attr5_3,sinc5_3,intel5_3,fun5_3,amb5_3
0,1,1.0,0,1,1,...,,,,,
1,1,1.0,0,1,1,...,,,,,
2,1,1.0,0,1,1,...,,,,,
3,1,1.0,0,1,1,...,,,,,
4,1,1.0,0,1,1,...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
7889,530,22.0,0,43,2,...,3.0,8.0,8.0,5.0,5.0
7890,530,22.0,0,43,2,...,3.0,8.0,8.0,5.0,5.0
7891,530,22.0,0,43,2,...,3.0,8.0,8.0,5.0,5.0
7892,530,22.0,0,43,2,...,3.0,8.0,8.0,5.0,5.0


In [91]:
#### Female: 2_1
df_gender_female[['attr2_1']].describe()

Unnamed: 0,attr2_1
count,4125.0
mean,35.694349
std,17.171131
min,10.0
25%,23.26
50%,30.0
75%,50.0
max,100.0


In [92]:
#### Female: 2_2
df_gender_female[['attr2_2']].describe()

Unnamed: 0,attr2_2
count,2835.0
mean,33.027513
std,14.54034
min,10.0
25%,20.83
50%,30.0
75%,40.0
max,85.0


> Observations: Women on average think men give more weight to 'attractivness'

#### Look at Women's ratings for men at 2_1  (what women THINK men look for)

In [93]:
df_gender_female[['attr2_1', 'sinc2_1', 'intel2_1', 'fun2_1', 'amb2_1', 'shar2_1']].describe()

Unnamed: 0,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1
count,4125.0,4125.0,4125.0,4125.0,4125.0,4125.0
mean,35.694349,11.343646,12.532022,18.73351,9.230638,12.645113
std,17.171131,6.254626,5.135046,6.50548,5.314698,6.130889
min,10.0,0.0,0.0,0.0,0.0,0.0
25%,23.26,5.0,10.0,15.0,5.0,10.0
50%,30.0,10.0,11.36,20.0,10.0,11.9
75%,50.0,15.0,15.0,20.0,13.16,16.67
max,100.0,30.0,30.0,50.0,30.0,30.0


#### What women actually look for in the opposite sex (1_1)

In [94]:
df_gender_male[['attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1']].describe()

Unnamed: 0,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1
count,4174.0,4174.0,4174.0,4164.0,4154.0,4154.0
mean,26.921689,16.498436,19.537374,17.763893,8.552829,10.996574
std,13.669663,7.212455,6.709546,6.573848,5.985803,6.7702
min,6.67,0.0,0.0,0.0,0.0,0.0
25%,20.0,10.0,16.67,15.0,5.0,5.0
50%,23.905,17.39,20.0,18.75,10.0,10.0
75%,30.0,20.0,23.08,20.0,12.0,15.34
max,100.0,40.0,42.86,50.0,53.0,30.0


#### Look at Mens's ratings for women at 2_1    (what men THINK women look for)

In [95]:
df_gender_male[['attr2_1', 'sinc2_1', 'intel2_1', 'fun2_1', 'amb2_1', 'shar2_1']].describe()

Unnamed: 0,attr2_1,sinc2_1,intel2_1,fun2_1,amb2_1,shar2_1
count,4174.0,4174.0,4174.0,4174.0,4164.0,4164.0
mean,25.092631,15.181078,16.279633,18.115379,14.234815,11.071924
std,13.334847,7.128021,6.705605,6.635233,7.346384,6.10383
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,16.67,10.0,10.0,15.0,10.0,5.0
50%,20.0,15.0,16.28,19.57,15.0,10.0
75%,30.0,20.0,20.0,20.0,20.0,15.0
max,95.0,50.0,40.0,44.0,50.0,30.0


#### What women actually look for in the opposite sex (1_1)

In [96]:
df_gender_female[['attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1']].describe()

Unnamed: 0,attr1_1,sinc1_1,intel1_1,fun1_1,amb1_1,shar1_1
count,4125.0,4125.0,4125.0,4125.0,4125.0,4103.0
mean,18.055224,18.305008,21.002502,17.147292,12.827222,12.704194
std,9.496919,6.755334,6.778147,5.53211,5.481803,5.796059
min,0.0,0.0,2.0,0.0,0.0,0.0
25%,13.04,15.0,18.0,15.0,10.0,10.0
50%,15.38,20.0,20.0,17.78,15.0,13.21
75%,20.0,20.0,25.0,20.0,16.67,16.0
max,90.0,60.0,50.0,40.0,30.0,30.0


In [97]:
df.sinc2_1.describe()
#signup survey

count    8299.000000
mean       13.273691
std         6.976775
min         0.000000
25%        10.000000
50%        15.000000
75%        18.750000
max        50.000000
Name: sinc2_1, dtype: float64

In [98]:
df.sinc2_2.describe()
#after dating event

count    5775.00000
mean       13.89823
std         6.17169
min         0.00000
25%        10.00000
50%        15.00000
75%        19.23000
max        40.00000
Name: sinc2_2, dtype: float64

In [99]:
df.intel2_1.describe()
#signup survey

count    8299.000000
mean       14.416891
std         6.263304
min         0.000000
25%        10.000000
50%        15.000000
75%        20.000000
max        40.000000
Name: intel2_1, dtype: float64

In [100]:
df.intel2_2.describe()
#after dating event

count    5775.000000
mean       13.958265
std         5.398621
min         0.000000
25%        10.000000
50%        15.000000
75%        17.390000
max        30.770000
Name: intel2_2, dtype: float64

In [101]:
df.fun2_1.describe()
#signup survey

count    8299.000000
mean       18.422620
std         6.577929
min         0.000000
25%        15.000000
50%        20.000000
75%        20.000000
max        50.000000
Name: fun2_1, dtype: float64

In [102]:
df.fun2_2.describe()
#after dating event

count    5775.000000
mean       17.967233
std         6.100307
min         0.000000
25%        15.000000
50%        18.520000
75%        20.000000
max        40.000000
Name: fun2_2, dtype: float64

In [103]:
df.amb2_1.describe()
#signup survey

count    8289.000000
mean       11.744499
std         6.886532
min         0.000000
25%         6.000000
50%        10.000000
75%        15.000000
max        50.000000
Name: amb2_1, dtype: float64

In [104]:
df.amb2_2.describe()
#after dating event

count    5775.000000
mean       11.909735
std         6.313281
min         0.000000
25%        10.000000
50%        10.000000
75%        15.090000
max        50.000000
Name: amb2_2, dtype: float64

In [105]:
df.shar2_1.describe()
#signup survey

count    8289.000000
mean       11.854817
std         6.167314
min         0.000000
25%        10.000000
50%        10.000000
75%        15.630000
max        30.000000
Name: shar2_1, dtype: float64

In [106]:
df.shar2_2.describe()
#after dating event

count    5775.000000
mean       12.887976
std         5.615691
min         0.000000
25%        10.000000
50%        13.950000
75%        16.515000
max        30.000000
Name: shar2_2, dtype: float64