23/02/2021 - **Preprocessing of labels** - Corentin GARET

# Read data

In [183]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [184]:
# Selecting data we will exploit
raw_data = ['extreme_sentiment_results.csv', 'mturk_extra_v2.csv', 'Batch_2980374_batch_results.csv', 'mosi_pom_output.csv', 'pom_extra_sqa_mono_results.csv']

In [185]:
# Load dataset and concatenate different databases (same shape/columns checked before)
df = pd.read_csv('../raw_data/labels/5000_batch_raw.csv')
for dataset in raw_data:
    df_bis = pd.read_csv(f'../raw_data/labels/{dataset}')
    frames = [df, df_bis]
    df = pd.concat(frames)

In [186]:
df.head()

Unnamed: 0,HITId,HITTypeId,Title,Description,Keywords,Reward,CreationTime,MaxAssignments,RequesterAnnotation,AssignmentDurationInSeconds,...,Answer.happiness,Answer.sadness,Answer.secret_word1,Answer.secret_word2,Answer.secret_word3,Answer.sentiment,Answer.surprise,Answer.video_load,Approve,Reject
0,3XUSYT70ITD42GQKT9TDGVDH7MS0DB,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:33:29 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,0.0,{},{},{},-2.0,0.0,0.0,,
1,3XUSYT70ITD42GQKT9TDGVDH7MS0DB,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:33:29 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,1.0,morsel,healer,district,-1.0,1.0,0.0,,
2,3XUSYT70ITD42GQKT9TDGVDH7MS0DB,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:33:29 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,0.0,{},{},{},0.0,0.0,0.0,,
3,3YO4AH2FPDWYRPUPYVZXGUKGQ1GQ05,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:32:54 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,0.0,{},{},{},0.0,0.0,0.0,,
4,3YO4AH2FPDWYRPUPYVZXGUKGQ1GQ05,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:32:54 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,1.0,0.0,morsel,healer,district,0.0,0.0,0.0,,


In [4]:
df.shape

(70806, 43)

In [5]:
# Check columns names
df.columns

Index(['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
       'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
       'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
       'Expiration', 'NumberOfSimilarHITs', 'LifetimeInSeconds',
       'AssignmentId', 'WorkerId', 'AssignmentStatus', 'AcceptTime',
       'SubmitTime', 'AutoApprovalTime', 'ApprovalTime', 'RejectionTime',
       'RequesterFeedback', 'WorkTimeInSeconds', 'LifetimeApprovalRate',
       'Last30DaysApprovalRate', 'Last7DaysApprovalRate', 'Input.VIDEO_ID',
       'Input.CLIP', 'Answer.anger', 'Answer.disgust', 'Answer.fear',
       'Answer.gender', 'Answer.happiness', 'Answer.sadness',
       'Answer.secret_word1', 'Answer.secret_word2', 'Answer.secret_word3',
       'Answer.sentiment', 'Answer.surprise', 'Answer.video_load', 'Approve',
       'Reject'],
      dtype='object')

# Data cleaning

In [6]:
# Checking columns with no information
df.isnull().sum()

HITId                              0
HITTypeId                          0
Title                              0
Description                        0
Keywords                           0
Reward                             0
CreationTime                       0
MaxAssignments                     0
RequesterAnnotation                0
AssignmentDurationInSeconds        0
AutoApprovalDelayInSeconds         0
Expiration                         0
NumberOfSimilarHITs            70806
LifetimeInSeconds              70806
AssignmentId                       0
WorkerId                           0
AssignmentStatus                   0
AcceptTime                         0
SubmitTime                         0
AutoApprovalTime                   0
ApprovalTime                   70806
RejectionTime                  70806
RequesterFeedback              70806
WorkTimeInSeconds                  0
LifetimeApprovalRate               0
Last30DaysApprovalRate             0
Last7DaysApprovalRate              0
I

In [187]:
df = df.drop(columns=['NumberOfSimilarHITs', 'LifetimeInSeconds', 'ApprovalTime', 'RejectionTime', 'RequesterFeedback', 'Approve', 'Reject'])

In [8]:
df.shape

(70806, 36)

In [10]:
# Checking values that will be important in constructing y + linking with X
for i in list(df.columns):
    print(f'{i} : {df[i].unique()}')

HITId : ['3XUSYT70ITD42GQKT9TDGVDH7MS0DB' '3YO4AH2FPDWYRPUPYVZXGUKGQ1GQ05'
 '3YGYP13641L1OM1WCGQVWWQYUGTRNI' ... '3ZZAYRN1I63WMME0HKV3B0RL97FTOP'
 '3ZZAYRN1I63WMME0HKV3B0RL97GOTL' '3ZZAYRN1I63WMME0HKV3B0RL97GTOQ']
HITTypeId : ['310F0WGLWJM86SJMYKLR3YFKF1FLT9' '3PFYYPJUJWHYRI6DU6S4AQHSUXDZ2H']
Title : ['MOSI2 Video Annotation']
Description : ['Watch a short video clip and answer a single sentiment question']
Keywords : ['sentiment']
Reward : ['$0.10' '$0.10 ']
CreationTime : ['Sat Oct 28 15:33:29 PDT 2017' 'Sat Oct 28 15:32:54 PDT 2017'
 'Sat Oct 28 15:33:12 PDT 2017' 'Sat Oct 28 15:32:56 PDT 2017'
 'Sat Oct 28 15:33:09 PDT 2017' 'Sat Oct 28 15:33:24 PDT 2017'
 'Sat Oct 28 15:33:08 PDT 2017' 'Sat Oct 28 15:32:55 PDT 2017'
 'Sat Oct 28 15:33:23 PDT 2017' 'Sat Oct 28 15:32:57 PDT 2017'
 'Sat Oct 28 15:33:14 PDT 2017' 'Sat Oct 28 15:33:13 PDT 2017'
 'Sat Oct 28 15:33:30 PDT 2017' 'Sat Oct 28 15:33:26 PDT 2017'
 'Sat Oct 28 15:33:10 PDT 2017' 'Sat Oct 28 15:32:58 PDT 2017'
 'Sat Oct 28 15:3

# Creation of y1 : Sentiment rating taking Answer.sentiment

## Creation and cleaning of y1

In [188]:
# Checking that each clip has been rated 3 times
df['MaxAssignments'].unique()

array([3])

In [189]:
# Selecting columns that are of interest for y1 : ID + Sentiment
int_columns = ['AssignmentId', 'WorkerId', 'Input.VIDEO_ID', 'Input.CLIP', 'Answer.sentiment']

In [190]:
y1 = df[int_columns]
y1.shape

(70806, 5)

In [191]:
# Looking at sentiment answers that are NaN
y1[y1['Answer.sentiment'].isnull()]

Unnamed: 0,AssignmentId,WorkerId,Input.VIDEO_ID,Input.CLIP,Answer.sentiment
2866,3FTF2T8WLSUOU0BI02MYPDYF0CN9W3,A3R79CYQ817AV3,ZtuTCuh9C1M,29,
3018,3A7Y0R2P2P0SPCTHWJ9XGQQP5J8JX4,A3R79CYQ817AV3,cMUS4nhcKCQ,21,
3614,3VNL7UK1XGV53TY18JHFCPDRZG7TFW,A3R79CYQ817AV3,EGA6iulTr00,7,
3788,3Y5140Z9DYSRLS7KSAEGACPMQ5OPIO,A3R79CYQ817AV3,jHB9kiH6Vas,3,
3871,3QL2OFSM97U05497KL52VKMWDMFNC2,A3R79CYQ817AV3,cSrM5mHACmA,11,
4014,39ZSFO5CA98088ZMREYAWJRLB1EUJA,A3R79CYQ817AV3,3aIQUQgawaI,21,
4184,3NPI0JQDAPHHU6NVMMDKOJRV1NLTPS,A3R79CYQ817AV3,mRnEJOLkhp8,30,
4207,3HFNH7HEMIQYPD242ZT2P52GGOQQGW,A3R79CYQ817AV3,IRY4D_-mx3Q,7,
4261,3CTOC39K382YAXNQE65ZMRD13QFJ7V,A3R79CYQ817AV3,JATMzuV6sUE,4,
4488,3MD9PLUKKJQDDXKHCZPA6INHM48ZNK,A3R79CYQ817AV3,rLNY4-FWt5c,5,


In [192]:
# Removing sentiment anwers that are NaN
y1 = y1[~y1['Answer.sentiment'].isnull()]

In [193]:
y1.shape

(70763, 5)

In [194]:
y1.head()

Unnamed: 0,AssignmentId,WorkerId,Input.VIDEO_ID,Input.CLIP,Answer.sentiment
0,37TRT2X24R3LC9QGRNHFRTZXMEBBJA,A2WNW8A4MOR7T7,xSCvspXYU9k,5,-2.0
1,3DL65MZB8ERWBYWURM2J5XSNZRKCE2,A3CAF7LTD3ORSG,xSCvspXYU9k,5,-1.0
2,3ZOTGHDK5JNYU6YFIMVFPEQJPVKSOM,A2R0YYUAWNT7UD,xSCvspXYU9k,5,0.0
3,33IZTU6J82DLOFC7JDIB8RHC1Y7SX7,A2R0YYUAWNT7UD,_UNQDdiAbWI,0,0.0
4,3ATPCQ38J9M6OOGN79IM65UFHYTAYY,A1AZAC9CPBEP6K,_UNQDdiAbWI,0,0.0


## Creation of the foreign key Segment_ID

In [195]:
y1.dtypes

AssignmentId         object
WorkerId             object
Input.VIDEO_ID       object
Input.CLIP            int64
Answer.sentiment    float64
dtype: object

In [196]:
# Converting Input.VIDEO_ID to str
y1['Input.VIDEO_ID'] = y1['Input.VIDEO_ID'].astype(str)

In [197]:
# Converting Input.CLIP to str
y1['Input.CLIP'] = y1['Input.CLIP'].astype(str)

In [198]:
# Creating a column that compiles InputVIDEO ID + Input.CLIP : this column will become the foreign key linking with X
y1["Segment_ID"] = y1["Input.VIDEO_ID"] + y1["Input.CLIP"]

In [199]:
y1.shape

(70763, 6)

In [200]:
len(y1["Segment_ID"].unique())

23518

## Creation of y1 taking the mean for each segment

In [201]:
y1 = y1.groupby('Segment_ID').mean()
y1.head()

Unnamed: 0_level_0,Answer.sentiment
Segment_ID,Unnamed: 1_level_1
--qXJuDtHPw5,1.0
-3g5yACwYnA10,1.0
-3g5yACwYnA13,0.666667
-3g5yACwYnA2,0.0
-3g5yACwYnA3,0.0


In [202]:
y1.reset_index(inplace=True)

In [252]:
y1.to_csv('y1_sentiment.csv', index=False)

In [206]:
y1_IDs = y1['Segment_ID']

In [209]:
y1_IDs.to_csv('Segment_ID_labels.csv', index=False)

# Creation of y2 : 6-dimensional emotional target

## Creation and cleaning of y2

In [30]:
df.head(2)

Unnamed: 0,HITId,HITTypeId,Title,Description,Keywords,Reward,CreationTime,MaxAssignments,RequesterAnnotation,AssignmentDurationInSeconds,...,Answer.fear,Answer.gender,Answer.happiness,Answer.sadness,Answer.secret_word1,Answer.secret_word2,Answer.secret_word3,Answer.sentiment,Answer.surprise,Answer.video_load
0,3XUSYT70ITD42GQKT9TDGVDH7MS0DB,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:33:29 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,0.0,0.0,0.0,{},{},{},-2.0,0.0,0.0
1,3XUSYT70ITD42GQKT9TDGVDH7MS0DB,310F0WGLWJM86SJMYKLR3YFKF1FLT9,MOSI2 Video Annotation,Watch a short video clip and answer a single s...,sentiment,$0.10,Sat Oct 28 15:33:29 PDT 2017,3,BatchId:2988081;OriginalHitTemplateId:926051491;,900,...,0.0,0.0,0.0,1.0,morsel,healer,district,-1.0,1.0,0.0


In [31]:
df.columns

Index(['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',
       'CreationTime', 'MaxAssignments', 'RequesterAnnotation',
       'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',
       'Expiration', 'AssignmentId', 'WorkerId', 'AssignmentStatus',
       'AcceptTime', 'SubmitTime', 'AutoApprovalTime', 'WorkTimeInSeconds',
       'LifetimeApprovalRate', 'Last30DaysApprovalRate',
       'Last7DaysApprovalRate', 'Input.VIDEO_ID', 'Input.CLIP', 'Answer.anger',
       'Answer.disgust', 'Answer.fear', 'Answer.gender', 'Answer.happiness',
       'Answer.sadness', 'Answer.secret_word1', 'Answer.secret_word2',
       'Answer.secret_word3', 'Answer.sentiment', 'Answer.surprise',
       'Answer.video_load'],
      dtype='object')

In [33]:
# Selecting columns that are of interest for y2 : IDs + Emotions columns
int_columns2 = ['AssignmentId', 'WorkerId', 'Input.VIDEO_ID', 'Input.CLIP', 'Answer.anger',
       'Answer.disgust', 'Answer.fear', 'Answer.happiness',
       'Answer.sadness', 'Answer.surprise']

In [37]:
y2 = df[int_columns2]
y2.shape

(70806, 10)

In [38]:
# Checking NaN in results
y2.isnull().sum()

AssignmentId         0
WorkerId             0
Input.VIDEO_ID       0
Input.CLIP           0
Answer.anger        47
Answer.disgust      49
Answer.fear         47
Answer.happiness    47
Answer.sadness      48
Answer.surprise     53
dtype: int64

In [43]:
# There are 77 rows that have at least one NaN in the answers. Looking at the data, I chose to delete them.
y2[y2.isnull().any(1)].shape

(77, 10)

In [46]:
# Selecting rows without any NaN
y2 = y2[~y2.isnull().any(1)]

In [49]:
y2.shape

(70729, 10)

## Creation of foreign key Segment_ID

In [50]:
y2.dtypes

AssignmentId         object
WorkerId             object
Input.VIDEO_ID       object
Input.CLIP            int64
Answer.anger        float64
Answer.disgust      float64
Answer.fear         float64
Answer.happiness    float64
Answer.sadness      float64
Answer.surprise     float64
dtype: object

In [52]:
# Converting Input.VIDEO_ID & Input.CLIP to str
y2['Input.VIDEO_ID'] = y2['Input.VIDEO_ID'].astype(str)
y2['Input.CLIP'] = y2['Input.CLIP'].astype(str)

In [53]:
# Creating a column that compiles InputVIDEO ID + Input.CLIP : this column will become the foreign key linking with X
y2["Segment_ID"] = y2["Input.VIDEO_ID"] + y2["Input.CLIP"]

In [54]:
y2.shape

(70729, 11)

In [55]:
len(y2["Segment_ID"].unique())

23518

## Quick exploration of y2

In [87]:
y2.reset_index(inplace=True)

In [90]:
y2.shape

(70729, 13)

In [89]:
y2.tail(10)

Unnamed: 0,index,AssignmentId,WorkerId,Input.VIDEO_ID,Input.CLIP,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,Segment_ID,ann_emotions
70719,19628,3TYCR1GOTDVNPXHW0M9VTQJSJFCZL0,A2QAIIROYI52KO,pom_extra/216007,16,0.0,0.0,0.0,0.0,0.0,0.0,pom_extra/21600716,0
70720,19629,3GS6S824SR99J2FC1C537ETZ78DWNE,A2R0YYUAWNT7UD,sqa_mosi/32j1yMF37hA,1,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/32j1yMF37hA1,0
70721,19630,3LEIZ60CDKBSOVGNBP65OHSXJL3Z9B,A3GLUDQZGEJL5G,sqa_mosi/32j1yMF37hA,1,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/32j1yMF37hA1,0
70722,19631,3OHYZ19UGDHUUACL1M9O4EWD5ZIAOZ,A1U0GNE5ELW5V7,sqa_mosi/32j1yMF37hA,1,0.0,0.0,0.0,1.0,0.0,0.0,sqa_mosi/32j1yMF37hA1,1
70723,19632,3B3WTRP3DCE2IKDBMIXR16IQYCA29C,A1ET2J1PIP0RGO,sqa_mosi/DbAppk7xT0Y,7,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/DbAppk7xT0Y7,0
70724,19633,3WLEIWSYHPTVXQVUKULJENGEHPGH2F,A1FHS282JP487T,sqa_mosi/DbAppk7xT0Y,7,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/DbAppk7xT0Y7,0
70725,19634,3ZAZR5XV02UUMTNQN9GWQ7T5P6AZCW,A2R0YYUAWNT7UD,sqa_mosi/DbAppk7xT0Y,7,0.0,0.0,0.0,1.0,0.0,0.0,sqa_mosi/DbAppk7xT0Y7,1
70726,19635,37TD41K0AILXLH1CSHMOUEW22RASCK,A1ET2J1PIP0RGO,sqa_mosi/iREkcXde5ds,4,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/iREkcXde5ds4,0
70727,19636,38F71OA9GU81QY8HZMG3QBIZ7FPFMB,A1FHS282JP487T,sqa_mosi/iREkcXde5ds,4,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/iREkcXde5ds4,0
70728,19637,3HWRJOOET6ECIFLQO8SJK2W5N3NSED,A394S7JSNUA3TS,sqa_mosi/iREkcXde5ds,4,0.0,0.0,0.0,0.0,0.0,0.0,sqa_mosi/iREkcXde5ds4,0


In [73]:
emotions = ['Answer.anger', 'Answer.disgust', 'Answer.sadness', 'Answer.fear', 'Answer.happiness', 'Answer.surprise']

In [61]:
for i in emotions:
    print(f'{i}: {round(y2[i].mean(),2)}')

Answer.anger: 0.15
Answer.disgust: 0.12
Answer.fear: 0.04
Answer.happiness: 0.47
Answer.surprise: 0.05


In [91]:
y2["ann_emotions"] = ""
for index, row in y2.iterrows():
    compt = 0
    for i in emotions:
        if row[i] > 0:
            compt+=1
    y2['ann_emotions'].iloc[index] = compt
#     row['ann_emotions'] = compt

1
2
0
0
1
2
1
1
2
1
2
0
4
1
1
1
1
1
0
0
1
1
0
1
1
2
1
1
1
0
0
0
1
0
1
0
0
1
1
0
0
1
1
1
0
1
0
1
0
1
2
1
0
1
2
0
1
1
0
1
0
0
1
0
0
0
1
1
0
1
0
1
0
1
1
1
1
1
4
2
0
1
2
1
1
0
0
2
1
1
2
1
0
1
0
1
1
0
1
1
0
1
1
1
0
2
1
0
1
1
0
2
1
0
0
0
1
0
1
0
0
1
1
1
0
2
1
0
2
0
0
1
1
1
0
0
1
0
2
0
0
0
1
0
1
1
1
1
1
1
0
2
0
1
0
0
1
2
0
0
1
1
1
0
0
2
0
2
2
0
0
1
0
0
0
0
1
0
1
1
3
1
4
1
1
1
0
1
1
0
0
0
0
1
1
0
1
1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


1
1
1
0
0
0
1
0
1
2
0
0
0
0
1
2
1
1
2
1
1
0
2
1
0
2
0
1
1
0
0
1
1
0
0
0
2
2
1
0
1
1
0
1
1
1
2
0
0
0
1
1
1
0
2
0
0
2
0
0
0
1
1
1
1
1
1
0
1
2
0
0
1
1
0
0
0
0
0
0
1
1
0
1
1
1
1
0
0
2
0
2
0
2
0
0
0
0
1
1
0
2
1
1
0
1
0
0
1
1
0
2
4
2
0
1
0
2
1
0
0
0
0
0
1
1
0
2
1
0
1
2
1
1
1
0
1
2
0
0
0
2
1
0
1
0
1
0
0
2
1
1
0
0
0
0
3
0
1
3
2
2
0
1
1
3
0
1
0
1
0
1
0
1
2
0
0
0
1
1
1
1
0
0
0
2
0
0
0
1
0
1
1
1
1
1
1
0
1
2
1
2
1
0
2
1
0
0
0
1
2
3
0
2
0
3
1
2
0
2
0
1
0
1
0
1
1
1
0
0
0
2
0
0
1
0
1
1
0
2
2
0
1
0
0
1
0
0
1
2
0
0
0
0
1
0
0
1
2
0
0
0
2
0
1
0
1
1
2
1
1
0
0
0
1
1
2
1
2
1
1
1
0
2
0
1
0
1
0
1
0
0
1
0
1
1
1
0
1
1
2
1
1
2
0
0
0
1
0
1
0
1
1
0
1
1
1
1
0
1
0
1
3
1
1
0
0
1
2
0
1
0
1
1
1
0
0
0
1
0
1
0
0
1
1
1
0
1
1
0
0
1
1
1
1
1
1
0
0
0
2
0
1
0
0
2
0
1
1
1
0
3
0
0
1
0
2
0
2
0
1
1
1
1
1
1
1
1
0
2
0
2
1
1
1
2
0
1
1
1
1
0
1
0
1
1
0
0
0
1
0
0
0
0
0
0
1
0
1
3
2
1
2
1
1
0
0
0
0
0
1
0
1
1
1
1
1
2
1
2
1
1
0
1
1
1
1
0
2
0
0
1
3
1
1
0
0
2
0
0
0
1
0
0
0
0
1
0
0
0
0
0
2
0
2
0
1
1
0
1
1
0
0
1
1
2
3
0
0
0
0
1
0
1
1
0
1
2
1
1


2
0
0
1
1
0
0
0
1
0
1
1
1
0
1
1
2
0
0
0
0
0
1
1
0
3
2
2
0
1
1
0
1
0
1
0
1
0
1
1
1
2
0
0
0
0
1
1
0
0
1
2
0
1
1
2
1
0
0
1
1
1
0
1
0
0
1
1
1
1
1
1
1
0
0
1
0
0
0
0
1
2
1
1
1
1
1
0
1
0
1
1
1
1
0
0
1
0
0
0
1
1
1
0
1
2
1
1
1
0
0
0
1
0
0
0
0
1
1
1
1
3
0
1
0
0
1
0
0
1
0
2
0
1
0
1
1
1
1
0
0
0
2
1
1
0
0
0
1
0
1
0
0
1
1
0
0
0
2
1
0
0
0
1
1
1
0
0
1
0
1
1
0
2
1
0
0
0
0
0
0
1
0
1
2
0
0
0
0
1
1
1
1
1
1
0
1
0
1
1
1
1
0
1
0
2
0
1
0
1
1
1
1
0
0
0
2
0
1
1
0
3
1
1
0
0
0
0
0
0
1
4
1
0
0
0
1
0
1
1
0
0
0
1
2
0
0
0
0
0
1
0
1
0
0
3
0
0
0
0
0
1
0
0
0
1
1
2
3
0
1
0
1
1
1
0
0
0
1
1
1
0
0
1
2
0
0
1
1
0
0
0
0
0
1
0
1
1
1
1
0
0
0
0
0
1
1
2
1
0
1
1
2
0
2
1
1
1
1
0
0
0
1
1
1
2
0
0
0
0
0
1
0
0
0
1
0
0
1
0
0
1
1
1
0
1
1
2
1
2
2
1
1
0
0
1
0
1
1
1
0
1
1
4
2
1
0
0
0
1
3
1
1
1
0
0
0
1
1
2
0
1
1
1
2
1
0
1
0
1
2
1
0
0
0
1
1
1
0
0
0
0
0
0
1
2
1
1
0
1
0
0
0
0
2
1
1
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
1
0
2
0
0
0
0
0
0
0
1
1
1
0
0
1
1
1
1
0
0
1
1
1
0
0
0
0
1
1
0
0
1
2
1
1
0
1
0
1
1
1
1
0
0
1
0
0
2
0
0
0
1
1
1
0
0
0
0
0
0
1
1
0
1
1
2
0


0
0
0
1
0
1
0
1
1
0
1
1
0
0
2
0
1
1
2
0
0
0
2
1
1
0
0
3
2
0
0
2
2
1
0
0
2
0
1
0
0
0
2
0
0
1
2
0
1
1
1
0
1
2
1
1
1
1
1
1
1
0
0
0
1
1
0
0
0
0
1
0
0
0
0
2
0
1
0
0
0
0
0
0
0
1
1
1
0
1
1
0
0
0
1
2
1
1
1
1
0
2
1
1
1
0
1
1
3
2
1
0
0
0
0
2
0
1
3
1
1
1
0
1
0
0
0
1
0
0
1
1
1
0
0
0
2
1
0
1
1
0
0
0
0
0
0
0
1
0
0
0
2
0
1
0
1
1
4
0
1
0
0
2
1
1
2
1
0
0
0
0
1
0
0
0
0
0
1
3
0
0
0
1
1
1
0
1
1
0
0
2
0
2
4
0
0
0
1
1
1
1
0
1
1
1
1
1
1
1
1
1
0
0
0
0
1
1
1
0
0
1
0
1
1
1
1
1
0
0
0
2
0
1
2
0
1
0
0
1
3
2
3
1
2
0
0
0
0
1
1
1
1
1
1
0
0
0
0
1
0
0
0
2
0
0
1
1
1
0
2
1
1
1
1
0
0
0
1
0
0
3
0
1
0
0
1
0
1
1
0
1
1
1
1
0
1
2
0
0
0
1
0
0
0
0
1
1
0
1
1
0
1
2
1
1
1
2
1
2
0
0
1
2
1
0
1
1
1
1
2
1
1
1
0
1
0
0
1
0
2
1
1
1
0
1
0
0
0
0
0
1
0
0
0
0
2
0
2
0
0
2
1
0
0
0
1
1
0
0
1
1
1
0
2
0
0
1
1
0
0
0
0
0
0
1
3
0
0
1
0
0
0
2
0
1
0
1
0
1
1
0
0
0
0
0
1
1
1
0
0
0
0
0
2
1
0
1
0
0
0
0
1
1
0
1
1
3
0
0
1
1
0
2
0
1
0
0
0
0
0
0
0
0
0
0
0
0
2
1
0
2
1
1
0
1
2
0
0
1
0
1
1
0
2
1
1
1
1
0
1
1
1
1
1
0
0
0
0
1
0
0
0
0
0
0
0
1
2
3
0
0
2
0
0
0
1
2
0
1


0
0
0
1
2
1
4
2
0
0
0
0
0
1
1
0
0
0
1
1
0
0
0
0
0
2
0
1
0
1
0
0
1
2
1
0
0
0
0
1
1
1
0
1
0
3
0
0
0
1
1
1
0
1
0
0
0
0
0
1
0
0
0
1
1
0
0
0
0
1
2
0
1
1
1
2
0
0
1
1
1
0
1
2
1
2
0
1
0
0
0
1
1
0
2
1
0
0
0
1
1
1
0
0
3
0
1
0
2
2
0
0
0
1
0
0
0
2
2
1
0
1
0
0
0
0
1
0
0
0
0
0
0
1
0
1
0
1
0
0
2
2
0
1
1
0
1
0
1
1
1
1
0
0
0
0
1
1
0
1
1
1
0
0
0
1
1
1
0
1
0
0
0
0
2
1
0
2
1
0
0
1
0
0
2
0
1
1
1
0
2
0
1
1
2
1
1
1
1
1
0
1
1
1
0
2
0
1
1
1
0
1
2
1
1
1
1
1
1
0
0
0
1
2
0
1
1
1
1
1
1
1
2
1
0
0
0
0
0
1
0
1
0
2
1
1
0
0
1
2
0
1
0
2
1
1
1
1
1
1
1
1
0
0
0
0
0
0
1
0
0
0
0
2
1
0
1
0
2
1
1
1
0
0
0
2
1
0
0
0
0
0
0
0
0
0
0
1
0
1
1
1
0
3
5
4
1
1
1
0
0
1
0
0
2
0
0
3
1
1
1
1
1
2
2
0
1
0
2
0
1
2
1
2
0
0
2
1
1
1
1
2
1
0
1
0
1
1
0
1
0
0
1
0
1
1
2
1
2
1
1
0
2
0
0
0
0
0
0
2
1
2
2
1
1
1
1
1
4
0
0
0
0
2
0
0
0
1
1
1
1
0
0
0
1
1
1
2
0
1
0
0
1
1
1
1
1
1
1
1
0
1
1
0
1
1
1
1
1
0
3
0
1
3
1
0
2
0
0
0
0
1
0
1
1
2
1
1
0
2
1
0
0
0
0
1
1
1
2
1
1
1
0
2
0
1
0
0
1
1
1
1
1
0
1
0
0
1
1
1
0
0
2
1
1
2
0
1
1
0
0
0
1
0
2
0
1
1
2
1
1
1
0
0
1
0
0
2
2
2


2
0
0
2
0
0
0
0
0
0
2
0
0
3
1
1
1
0
1
0
2
1
1
1
0
1
1
0
1
0
0
0
1
0
0
1
0
0
1
0
0
1
2
1
2
1
0
0
0
1
0
1
1
0
0
1
0
0
0
0
1
0
1
1
1
1
0
1
1
1
1
0
1
0
1
0
1
1
0
1
1
0
1
0
1
0
0
1
1
2
0
0
0
0
0
1
1
0
2
2
0
1
1
1
0
0
0
0
0
0
0
0
1
0
1
1
0
2
0
0
0
1
1
0
1
0
0
2
2
1
0
0
0
0
1
2
1
0
4
2
0
1
1
0
1
0
1
0
1
3
2
0
1
1
1
0
1
1
1
0
1
0
0
0
0
0
0
2
2
2
2
1
1
0
0
0
0
2
0
1
0
0
2
1
1
1
0
0
0
2
1
0
1
0
2
0
0
0
0
0
1
0
0
0
2
0
0
0
0
1
1
1
1
1
0
0
0
0
0
1
1
0
1
1
0
1
0
0
1
0
1
1
0
0
0
0
1
0
0
1
0
1
0
0
2
0
1
0
1
1
1
0
0
1
0
2
0
0
1
0
1
1
1
0
0
0
1
0
0
0
0
0
0
0
1
1
0
0
1
1
1
0
1
0
1
1
1
0
1
1
1
1
1
1
0
1
0
0
0
2
0
1
0
2
0
0
1
0
0
0
1
0
1
1
0
2
0
1
0
1
1
0
1
0
0
1
1
0
1
1
1
1
2
2
1
0
1
0
0
0
2
0
1
0
1
2
1
0
1
0
1
1
0
0
1
1
1
0
0
0
0
0
0
0
2
1
0
1
0
2
0
1
0
0
1
0
2
2
0
0
1
1
1
0
0
0
0
1
1
1
0
1
0
1
0
1
1
1
1
1
0
1
0
0
0
0
0
1
1
0
1
0
0
2
0
1
0
0
2
0
0
1
0
1
0
0
1
1
0
0
0
0
1
0
1
1
1
1
1
0
0
0
0
0
2
1
1
1
1
1
1
1
0
1
1
0
1
0
0
0
0
0
0
0
2
0
1
0
1
0
0
0
0
0
1
0
1
0
1
0
1
0
1
1
0
0
0
0
0
0
0
1
1
1
1
1
1
0
0
1


1
1
1
1
0
0
1
0
1
1
0
0
0
0
0
0
0
1
1
2
1
1
0
1
1
0
1
1
2
0
0
2
0
0
1
1
1
0
0
1
1
2
1
1
0
1
0
0
0
0
2
1
0
0
0
1
1
0
0
0
0
1
2
1
0
2
0
1
0
1
2
1
1
0
2
1
0
1
1
0
0
0
1
1
1
2
4
2
1
1
1
0
1
1
0
0
1
0
0
0
1
1
1
1
1
1
0
1
0
1
1
1
0
0
0
0
1
0
1
1
2
0
1
1
0
0
0
0
3
0
0
0
2
1
0
1
1
0
1
0
0
1
1
0
0
1
0
0
1
0
0
2
1
1
1
1
1
1
0
0
0
0
0
1
0
0
0
0
1
1
1
0
2
0
0
1
0
1
0
1
0
1
1
0
0
1
1
1
1
0
0
0
0
1
0
1
1
0
1
2
0
1
1
1
1
1
0
0
1
2
1
0
0
1
0
0
2
0
1
1
0
0
2
0
0
1
1
0
1
1
0
1
1
0
1
1
1
0
1
1
1
0
1
0
2
1
0
1
1
1
1
1
1
1
1
1
1
1
0
1
0
1
0
0
1
1
0
1
0
0
0
2
1
1
1
0
1
2
2
0
0
1
1
1
1
1
1
1
1
1
1
2
1
1
1
0
0
1
1
0
1
1
0
1
0
1
0
0
1
1
2
0
2
1
0
1
0
1
1
1
0
0
0
1
1
1
1
0
1
1
0
1
0
2
0
0
2
3
1
2
0
1
0
1
1
1
1
1
1
0
1
1
1
1
1
1
0
0
0
1
0
1
1
0
1
0
0
0
0
2
1
1
1
0
2
1
0
1
1
1
1
1
1
0
1
0
1
0
1
1
1
0
1
0
1
1
1
1
1
1
1
1
0
1
2
1
1
1
1
1
1
1
1
1
0
0
0
0
1
1
1
1
1
0
1
0
1
1
1
1
1
1
2
1
1
1
1
0
0
1
1
1
1
1
1
0
0
1
1
1
1
1
0
1
1
0
1
1
1
1
1
1
0
1
1
1
0
0
0
0
0
0
0
1
2
0
1
0
1
1
1
0
0
0
1
1
0
1
1
2
1
0
1
1
1
0
1
1
1
0


1
0
1
0
1
0
1
1
0
1
0
1
0
0
1
1
1
0
0
1
0
0
0
0
0
0
0
1
0
1
1
0
1
0
1
1
0
0
1
0
0
0
1
0
0
0
1
0
1
0
0
1
1
1
1
1
1
0
0
0
1
0
0
1
1
0
0
0
0
1
1
0
1
1
0
0
0
0
1
0
1
1
1
0
1
0
1
1
1
1
0
1
0
1
1
0
0
0
0
1
0
1
1
1
1
1
1
0
1
0
0
0
1
2
0
2
1
1
0
0
0
0
1
0
1
0
1
0
1
1
1
0
0
2
0
0
1
1
0
1
1
1
0
0
1
0
1
1
0
0
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
1
0
0
1
1
2
0
0
1
1
0
1
0
0
1
1
1
1
2
1
0
0
0
0
0
1
0
2
0
0
0
0
0
1
2
1
0
1
1
0
0
1
0
0
0
2
0
0
1
0
1
1
1
1
1
1
1
0
1
0
0
1
0
1
1
1
1
1
0
1
1
0
1
1
1
0
0
0
1
1
1
1
0
2
0
0
0
1
1
1
1
1
1
1
1
1
2
0
0
0
0
0
1
1
1
0
1
1
0
1
0
0
1
1
1
1
0
1
1
0
0
1
1
1
1
1
0
1
1
1
0
1
1
1
2
1
0
0
0
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
0
1
1
2
1
1
1
1
1
1
1
0
2
1
1
1
0
1
0
1
0
0
1
1
1
1
1
2
1
1
2
1
1
1
1
0
0
1
2
0
1
1
2
0
1
0
1
1
1
1
1
1
1
0
1
1
1
1
0
0
0
1
1
1
1
1
1
1
0
0
0
0
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
0
0
0
0
1
1
0
1
1
1
2
0
1
1
0
0
1
1
0
0
1
1
1
1
1
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
0
1
2
1
1
1
1
2
1
0
1
1
1
1
1
0


0
0
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
1
0
0
1
1
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
1
1
1
0
0
1
0
1
0
0
0
1
1
0
0
1
1
1
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
1
0
1
1
1
1
1
1
1
1
1
1
1
0
0
1
0
1
1
1
0
0
0
2
0
0
0
1
1
0
1
1
1
0
0
1
2
1
1
0
2
1
0
1
1
1
1
1
2
2
2
1
1
0
1
1
1
1
2
0
0
1
0
0
1
1
1
1
2
1
0
1
0
3
0
0
1
0
1
0
0
1
0
0
0
1
3
1
1
0
0
2
0
2
1
0
0
0
1
0
0
2
1
2
2
1
0
0
0
0
1
1
0
0
0
0
1
1
0
0
1
1
0
1
2
2
1
1
0
1
1
2
2
1
0
1
0
0
1
1
1
1
1
0
0
0
1
1
2
0
4
1
2
0
1
0
1
1
1
1
0
1
0
0
1
1
1
0
0
1
1
1
1
1
1
1
1
0
1
1
0
1
0
1
0
0
1
0
0
0
0
0
1
0
1
0
1
1
1
1
0
0
0
0
1
0
0
2
1
1
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
0
1
0
1
1
1
0
1
0
0
1
0
1
1
1
0
1
0
0
1
1
1
0
0
0
0
1
0
1
1
0
0
0
1
1
1
0
0
0
1
1
0
0
0
0
1
1
0
0
1
0
1
1
0
4
2
0
1


1
1
0
0
1
1
0
0
0
0
0
1
2
0
0
1
2
1
0
0
1
0
0
0
2
3
0
1
1
0
0
0
0
0
1
0
0
0
1
2
1
1
1
0
0
0
4
0
1
1
1
1
1
1
0
1
0
2
0
0
2
0
2
0
0
0
1
1
1
1
1
1
0
1
1
1
1
0
1
0
0
1
0
1
1
1
1
0
0
0
0
1
1
1
0
0
1
0
0
0
0
2
0
2
1
1
1
1
0
0
1
0
1
2
0
1
0
1
0
1
1
1
0
0
0
1
0
1
1
0
0
0
0
0
1
1
0
0
0
0
0
0
0
1
0
2
0
0
0
2
0
0
1
1
0
1
2
0
0
1
1
3
4
2
1
1
0
1
1
1
0
1
1
1
2
0
1
0
0
0
2
1
0
1
1
0
1
2
2
1
1
0
0
0
1
0
0
1
0
1
0
2
2
2
1
0
0
1
0
0
1
1
1
1
0
0
0
1
1
1
1
1
1
1
0
0
0
1
1
1
0
1
0
2
0
1
0
0
2
1
0
0
1
0
0
0
1
1
1
1
1
1
1
2
1
1
2
0
0
0
0
0
0
0
0
1
0
0
0
2
0
1
1
2
0
0
0
0
2
1
1
1
1
1
1
1
1
0
0
1
1
1
0
1
1
0
0
1
1
0
0
1
1
2
0
1
1
1
2
1
0
1
1
0
0
0
0
1
0
1
0
0
1
0
1
1
1
0
0
0
0
0
0
1
1
2
0
0
1
0
0
1
1
1
0
0
0
0
2
1
0
2
0
0
0
0
0
1
0
1
1
0
1
0
0
0
1
2
1
1
0
1
1
0
1
0
0
2
2
0
1
0
1
0
0
1
0
0
2
1
1
0
0
0
1
0
0
1
2
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
0
1
0
1
1
1
0
1
1
0
0
1
0
1
0
1
1
0
1
0
1
2
1
0
1
1
1
0
0
1
0
2
1
0
0
1
1
0
0
0
0
1
1
1
1
0
0
0
0
1
1
1
0
1
1
0
0
1
1
0
1
0
0
0
0
0
0
0
1
1
3
1
0
0
1
0
0
1
2
0
1
0
0
1


1
1
1
0
2
2
1
1
1
0
1
0
1
2
0
0
1
0
1
0
1
0
0
0
1
0
2
2
2
1
1
0
0
1
0
1
1
0
0
1
0
2
1
1
1
1
0
0
1
0
2
0
2
0
1
0
1
1
0
0
1
0
2
0
1
0
2
0
0
0
1
1
2
1
0
0
1
1
0
2
1
1
1
1
1
0
1
2
3
0
0
0
1
1
0
1
0
1
0
2
2
1
1
0
0
1
1
1
0
0
1
1
2
2
0
0
2
1
1
0
0
0
1
1
0
3
2
0
1
3
1
1
1
1
1
2
2
1
0
1
0
2
0
2
2
1
0
0
1
1
1
2
0
1
0
0
2
0
1
0
2
0
1
2
1
1
0
0
2
1
2
0
1
2
0
0
1
1
0
0
2
0
1
1
0
1
0
0
2
2
0
0
1
1
1
1
0
1
0
2
0
0
1
0
0
0
0
1
1
0
1
1
0
1
3
2
1
1
1
2
0
0
2
2
1
0
0
2
0
0
0
2
1
0
0
1
2
1
0
0
0
0
1
0
0
0
0
0
0
1
1
1
1
1
0
0
2
0
1
3
0
0
0
0
1
1
0
2
0
0
1
1
0
0
1
0
0
0
0
2
1
0
1
0
1
0
1
1
1
0
0
0
0
0
0
0
0
0
1
1
1
0
0
1
2
2
1
1
0
0
1
1
0
3
0
0
0
1
2
1
0
0
0
0
0
1
2
1
0
0
2
0
1
0
1
1
0
1
1
0
2
1
2
3
0
1
0
0
1
1
1
0
0
0
1
0
0
0
0
2
1
0
0
1
0
0
1
1
1
1
2
0
0
0
1
1
0
0
1
2
2
1
2
2
0
3
2
2
0
0
0
0
1
1
1
0
0
0
0
1
0
2
1
0
0
1
1
1
1
0
0
0
0
1
1
0
0
0
2
0
1
0
0
0
0
1
0
2
2
0
2
2
2
0
1
0
1
0
0
0
0
0
1
1
0
1
0
0
1
0
2
2
1
0
1
1
2
2
0
1
0
0
0
1
0
0
1
1
2
0
1
1
0
2
0
2
2
0
1
0
0
1
0
0
1
2
0
3
1
2
0
1
0
1
2
2
2
2
0
1


1
0
0
0
0
2
0
1
0
0
0
0
1
2
1
0
2
0
1
0
0
1
0
1
0
1
0
0
1
2
2
1
0
0
1
1
1
0
0
1
1
0
2
0
0
1
0
2
2
2
1
0
1
0
1
1
2
1
1
0
1
0
0
1
2
0
0
1
0
0
0
3
0
0
1
1
1
0
0
1
0
1
1
0
0
0
2
1
1
0
1
1
2
1
0
0
2
0
1
0
1
0
0
1
0
0
0
0
2
0
1
1
0
1
0
2
0
3
0
1
0
2
1
1
0
0
1
0
0
0
0
0
0
1
2
0
2
0
0
1
0
1
0
1
2
1
0
0
0
1
0
0
0
0
1
1
0
0
2
0
2
0
0
0
1
2
0
0
0
1
1
0
0
0
0
0
0
0
1
1
0
1
0
0
0
0
0
0
0
0
0
1
0
1
0
1
1
0
0
0
1
1
0
0
1
0
1
1
0
2
1
0
1
0
1
0
0
0
1
1
2
0
0
2
0
0
1
0
1
0
0
0
1
2
1
0
0
1
1
2
0
0
1
0
0
2
0
1
0
0
0
1
0
0
1
1
1
0
0
0
2
0
1
0
3
0
1
0
0
0
0
0
2
0
0
0
0
0
3
0
1
0
0
1
0
1
0
0
0
0
2
1
1
2
0
2
0
0
1
0
2
1
1
1
0
0
0
1
0
0
1
0
1
1
1
2
2
2
2
0
0
1
1
1
0
0
1
2
1
0
1
1
0
1
1
1
0
1
0
2
0
1
0
1
0
0
0
0
2
2
1
2
1
2
0
0
0
1
0
1
0
0
1
1
0
0
0
2
1
1
1
0
2
1
0
0
0
0
0
1
0
0
2
0
1
0
1
0
2
0
0
1
0
2
0
0
0
0
0
1
0
0
0
0
0
0
0
1
1
0
1
0
1
0
0
0
0
0
1
0
1
0
2
0
2
1
1
1
2
0
1
0
1
1
1
1
2
0
0
0
1
1
2
1
0
0
1
0
1
0
1
0
1
0
1
2
1
0
0
0
0
2
1
1
1
2
2
0
1
1
1
0
1
0
1
1
0
0
0
2
1
0
1
2
0
0
1
0
1
0
1
1
1
0
0
0
0
3
0
0


2
0
0
0
1
0
2
0
1
1
2
0
1
1
1
2
0
3
1
3
1
1
1
2
1
0
2
1
2
0
1
0
0
2
2
1
1
0
2
0
0
1
3
1
1
2
0
1
0
1
1
3
2
1
0
0
0
0
2
1
0
0
1
1
1
1
2
0
1
2
2
2
0
1
2
1
0
0
0
0
2
1
1
0
1
0
1
1
1
1
0
1
1
0
0
4
1
0
0
0
2
1
1
1
0
1
3
2
1
0
0
0
1
2
0
1
2
1
2
0
0
4
1
1
1
0
1
0
1
1
1
1
0
0
1
1
1
1
1
0
2
0
1
1
0
0
1
0
0
1
1
1
1
0
1
1
2
1
1
0
0
0
2
1
2
1
1
1
1
0
1
0
0
0
0
0
2
0
2
1
0
1
0
2
3
1
0
1
2
2
3
2
0
0
0
2
0
1
2
0
0
2
0
0
0
0
1
1
2
1
1
0
1
2
0
3
1
1
1
1
1
0
0
1
1
0
1
0
0
1
1
2
0
2
0
1
1
0
0
1
1
1
1
1
1
1
0
0
1
0
1
0
2
0
0
0
0
0
3
0
3
2
1
2
0
0
1
3
2
1
2
1
1
0
3
1
0
2
2
1
0
0
2
1
0
2
2
0
0
0
2
1
1
0
3
0
2
0
1
1
1
2
1
1
0
0
1
0
0
0
2
2
0
2
1
1
1
1
1
1
1
0
1
2
1
0
0
1
2
1
2
5
0
0
1
1
1
1
2
0
0
2
2
1
1
1
0
1
1
0
0
1
0
1
1
2
2
0
1
1
1
1
1
1
1
0
1
0
0
1
1
2
2
1
2
0
0
2
0
3
3
1
0
2
1
1
1
1
1
0
1
1
1
1
1
0
1
1
3
0
1
2
1
1
1
1
0
0
3
1
2
3
1
1
1
2
1
0
0
0
1
1
0
1
2
0
1
1
0
0
2
0
1
1
0
0
0
0
1
1
1
1
1
2
1
0
1
2
1
1
1
1
0
1
0
0
2
0
0
1
1
2
0
1
2
1
1
0
1
0
0
1
0
2
2
3
0
1
1
3
0
1
2
1
2
1
0
1
1
1
0
0
0
0
0
0
1
2
1
0


0
2
1
1
1
1
1
1
1
1
1
1
0
1
1
0
2
2
2
1
1
3
2
1
1
0
1
0
1
1
1
1
1
1
1
1
0
3
2
1
1
1
1
1
1
3
1
2
0
1
1
0
2
2
1
2
0
1
1
2
1
3
4
1
1
1
3
2
0
1
2
2
2
0
1
1
1
3
0
1
1
0
0
1
2
1
2
0
1
1
1
2
0
0
1
1
1
2
4
1
1
2
1
1
1
2
2
2
1
1
1
1
1
1
1
1
1
1
2
2
0
1
1
1
2
0
1
1
2
0
0
1
2
1
0
0
1
0
2
1
1
1
2
1
0
2
1
2
2
1
0
1
3
1
0
2
2
0
1
2
2
2
1
0
1
1
2
0
1
1
1
1
1
1
0
1
1
0
1
1
0
0
1
3
0
0
0
2
1
1
1
0
1
0
2
1
0
2
0
1
1
0
1
1
1
1
2
1
0
0
2
1
1
0
2
1
0
1
1
1
1
1
1
2
1
2
4
1
2
1
1
1
0
1
0
0
1
2
0
1
1
1
0
1
1
1
1
1
1
0
1
2
1
0
1
1
1
0
1
1
1
1
1
1
0
0
1
0
1
0
0
2
1
1
1
1
1
2
1
2
0
2
2
0
1
1
1
2
0
1
3
2
2
1
1
2
0
1
0
0
0
2
2
0
0
1
0
0
0
0
2
0
2
1
0
1
0
0
0
1
0
1
1
1
2
1
0
0
0
1
1
1
0
1
0
1
2
0
0
1
1
2
2
0
2
0
2
0
1
2
2
1
0
0
0
1
0
0
0
0
1
0
1
0
0
0
1
1
1
1
0
0
0
1
1
1
1
0
0
0
0
0
0
1
3
1
1
1
0
0
1
0
1
0
1
1
1
2
1
0
0
1
0
1
3
0
0
1
0
0
1
0
1
0
0
0
0
0
1
1
0
2
0
1
0
1
0
2
0
1
0
0
2
0
2
0
0
0
0
0
0
0
0
0
1
0
0
0
1
1
1
1
2
1
0
2
0
3
3
2
1
2
2
1
0
1
1
1
2
1
1
1
1
0
0
0
0
1
0
0
0
0
1
1
2
1
1
0
1
0
1
0
0
2
0
0
0
0
0
0


0
0
0
0
0
2
2
0
0
0
1
1
0
0
0
0
0
1
0
0
0
1
0
0
0
0
2
0
0
0
0
0
0
2
1
1
1
1
1
1
0
1
1
0
1
0
0
0
0
0
0
0
0
0
0
0
1
1
1
0
0
1
1
1
0
0
0
0
1
0
2
0
0
0
1
0
0
0
0
1
0
0
0
0
0
1
0
0
1
2
0
0
1
0
2
0
0
0
2
0
0
0
1
1
1
1
1
1
1
1
1
0
0
0
0
2
0
4
0
0
0
1
1
0
0
0
0
0
1
0
0
0
0
0
0
1
0
0
1
0
3
1
1
0
1
1
0
0
0
1
0
3
1
1
1
0
0
1
1
1
0
1
1
0
1
0
0
1
1
2
0
0
1
1
1
0
3
0
3
1
1
0
1
0
0
2
0
0
1
1
0
0
0
1
0
1
0
1
0
1
1
1
2
1
1
0
0
0
0
0
0
0
1
0
0
1
0
0
0
1
0
1
0
0
2
0
1
2
1
0
2
1
1
0
1
2
0
0
0
1
2
0
0
0
0
1
1
1
0
1
0
0
0
1
1
1
1
0
1
0
0
0
0
0
1
1
1
1
0
3
0
0
0
0
1
1
0
1
1
0
0
1
0
1
0
0
1
0
1
0
1
0
1
1
1
3
1
1
0
0
0
0
0
1
0
1
0
2
0
3
1
1
0
1
1
0
1
2
3
0
0
3
1
0
0
0
0
0
0
0
0
1
0
1
0
2
0
1
0
3
0
1
0
0
1
1
0
1
1
0
2
1
1
0
2
0
0
0
2
0
0
1
1
0
0
0
0
0
0
0
1
0
0
0
0
0
0
2
0
1
1
1
0
1
1
1
0
2
1
0
2
2
1
0
0
0
0
1
0
1
1
1
1
1
1
1
0
0
1
0
1
2
1
0
0
0
0
0
1
1
0
0
1
0
0
0
0
1
0
1
0
0
0
0
1
0
0
0
1
1
1
1
0
0
1
1
0
3
0
0
0
0
0
1
0
0
0
0
1
0
0
1
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
1
4
1
1
2
0
1
0
0
1
1
1
0
0
1
1
1
1
0
0
0


0
1
1
1
1
0
0
0
1
0
0
0
1
0
3
1
1
1
1
1
1
1
1
0
1
0
1
1
1
1
0
0
2
0
0
0
0
1
0
0
1
0
1
1
2
1
2
2
1
1
1
1
1
2
0
0
0
1
1
0
0
0
1
1
2
1
1
3
0
1
0
2
1
1
1
0
0
0
1
1
1
0
2
3
1
1
0
1
0
1
1
0
0
0
1
1
1
0
2
1
1
1
1
0
1
0
0
1
0
0
1
1
0
1
0
0
0
0
0
0
1
0
0
1
1
1
1
0
1
0
1
2
1
1
0
0
0
0
0
0
0
0
1
0
1
0
1
1
2
2
1
1
1
0
1
0
1
1
1
0
0
0
1
1
1
0
0
0
0
0
0
1
1
1
0
0
0
0
1
2
0
0
1
2
0
1
2
0
1
1
1
1
0
0
1
1
1
1
1
0
0
1
1
1
1
1
2
1
1
1
2
0
0
1
0
0
0
1
0
1
0
1
0
1
0
0
1
1
0
1
1
0
0
0
1
0
1
0
0
0
0
2
1
2
3
1
0
0
1
0
0
0
0
0
0
1
1
1
0
2
2
0
0
0
0
0
0
1
0
0
0
3
0
0
2
0
0
0
0
1
0
2
0
0
0
0
0
0
1
1
1
0
2
2
0
1
0
1
1
2
1
0
1
1
1
0
0
1
1
1
1
0
0
0
0
1
1
1
0
1
0
0
0
0
0
1
1
1
0
0
0
1
1
0
0
1
2
0
3
0
2
0
0
2
2
1
1
1
0
0
0
1
0
0
1
1
1
1
0
1
0
0
0
1
1
0
0
0
0
1
1
2
1
0
1
0
1
1
1
1
1
0
0
0
0
1
1
1
0
0
1
0
0
0
0
1
0
1
0
0
1
2
0
0
0
1
0
0
0
0
1
0
0
0
3
1
1
1
4
1
0
0
0
0
2
2
0
0
0
0
0
1
1
0
2
0
1
1
0
0
1
0
1
0
1
1
1
0
1
0
1
2
1
1
0
0
1
1
0
0
1
0
1
1
1
0
4
2
0
0
0
1
0
1
1
1
0
0
0
0
0
0
0
0
1
1
0
1
0
0
0
1
0
0
1
0
2
0
0
0


3
0
0
1
0
0
0
1
0
0
1
0
2
0
0
2
1
1
0
0
1
0
1
1
0
1
0
1
0
0
0
0
1
0
1
1
1
0
0
0
0
1
1
0
1
1
1
1
1
0
0
1
1
0
0
1
0
0
0
0
0
3
1
0
1
0
0
0
0
0
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
2
1
1
0
0
0
2
0
1
0
1
3
1
0
0
2
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
1
1
0
0
0
0
0
1
0
0
2
1
1
0
1
1
0
1
0
1
1
0
1
0
1
2
1
1
1
1
0
1
1
0
0
0
1
1
2
0
1
0
0
0
0
0
0
1
1
1
0
0
0
0
0
0
1
0
1
0
0
1
0
4
1
0
0
0
0
0
0
0
0
0
0
0
2
0
0
0
1
0
0
0
1
1
1
1
1
1
1
1
0
3
1
0
0
0
0
1
0
0
0
0
0
0
1
1
0
1
1
1
0
1
0
0
0
0
0
0
0
0
1
1
0
3
0
1
1
1
1
2
1
1
0
1
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
0
1
1
1
0
0
0
0
0
1
1
0
1
0
1
0
2
3
1
0
3
1
0
0
0
0
1
0
0
0
0
1
1
0
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
2
4
1
1
1
0
1
0
1
1
3
1
1
0
0
1
1
1
0
1
0
0
1
0
0
0
2
1
0
1
1
1
0
0
0
0
0
0
1
1
1
2
2
0
0
0
0
0
0
1
1
1
0
0
1
1
0
1
0
2
0
0
1
0
0
0
0
0
1
1
1
0
1
0
1
2
1
1
0
1
1
1
1
0
0
0
0
0
0
1
1
1
0
1
0
0
1
0
1
1
1
0
0
0
2
0
1
0
0
0
1
1
0
0
0
0
1
1
0
0
0
0
1
1
0
0
1
2
0
0
1
0
1
0
1
1
1
1
1
0
0
0
1
0
0
0
2
0
0
0
0
2
0
1
1
2
0
0
1
1
1
0
1
2
0
0
0
0
1
1
0
0
0
0
0
0
1
1
0
0
0
0
0


0
0
0
0
1
0
0
0
0
1
0
1
0
0
0
0
0
0
0
0
1
1
1
0
0
1
0
0
1
0
1
1
0
0
0
2
1
1
1
1
0
0
0
1
0
0
0
0
1
0
0
1
0
0
1
0
1
2
1
0
0
0
3
1
1
0
0
3
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
1
1
0
0
2
1
1
1
0
0
0
0
0
0
0
2
0
0
1
1
1
1
0
0
1
1
2
2
1
0
0
1
0
2
1
0
1
0
1
2
3
1
1
2
2
0
0
1
2
1
1
0
1
0
0
0
0
1
0
1
0
1
0
0
1
1
1
2
2
0
1
1
1
1
0
2
0
1
0
0
0
0
0
0
0
0
0
1
0
0
1
0
1
1
0
1
0
0
1
1
0
0
1
0
1
1
0
1
0
0
1
0
1
1
1
1
0
0
1
0
1
1
1
0
1
2
0
2
1
2
3
0
0
0
0
1
0
2
0
2
0
0
1
0
0
0
1
1
1
0
0
1
0
0
0
1
0
0
0
1
0
0
0
0
1
1
0
0
0
0
1
1
0
0
1
1
0
0
1
2
1
1
0
2
1
2
1
1
1
2
1
0
1
1
0
1
0
2
0
1
0
0
0
0
0
2
0
0
0
0
1
3
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
3
1
1
0
0
0
0
1
0
0
0
1
1
1
0
0
0
0
0
0
0
1
0
0
1
0
0
0
1
0
0
0
0
0
0
0
2
0
0
0
0
0
0
0
0
0
3
1
0
0
1
1
0
0
1
0
1
0
0
1
0
1
1
0
0
0
0
0
0
0
1
0
0
0
1
2
0
1
2
1
0
1
0
0
0
0
0
0
2
0
0
0
0
0
0
0
1
1
2
0
0
0
0
1
1
0
1
4
0
1
0
1
1
1
0
1
0
1
0
0
1
1
0
2
0
0
1
0
1
0
0
2
2
1
1
0
1
1
0
1
0
1
1
1
0
0
0
1
2
0
0
1
1
1
0
0
0
0
0
1
2
1
0
0
0
2
0
1
0
1
0
1
1
1
0
0
0
1
1
1
1
0
0
0
0
1


In [92]:
y2.head()

Unnamed: 0,index,AssignmentId,WorkerId,Input.VIDEO_ID,Input.CLIP,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,Segment_ID,ann_emotions
0,0,37TRT2X24R3LC9QGRNHFRTZXMEBBJA,A2WNW8A4MOR7T7,xSCvspXYU9k,5,0.0,1.0,0.0,0.0,0.0,0.0,xSCvspXYU9k5,1
1,1,3DL65MZB8ERWBYWURM2J5XSNZRKCE2,A3CAF7LTD3ORSG,xSCvspXYU9k,5,0.0,0.0,0.0,0.0,1.0,1.0,xSCvspXYU9k5,2
2,2,3ZOTGHDK5JNYU6YFIMVFPEQJPVKSOM,A2R0YYUAWNT7UD,xSCvspXYU9k,5,0.0,0.0,0.0,0.0,0.0,0.0,xSCvspXYU9k5,0
3,3,33IZTU6J82DLOFC7JDIB8RHC1Y7SX7,A2R0YYUAWNT7UD,_UNQDdiAbWI,0,0.0,0.0,0.0,0.0,0.0,0.0,_UNQDdiAbWI0,0
4,4,3ATPCQ38J9M6OOGN79IM65UFHYTAYY,A1AZAC9CPBEP6K,_UNQDdiAbWI,0,0.0,0.0,0.0,1.0,0.0,0.0,_UNQDdiAbWI0,1


In [103]:
y2['ann_emotions'].value_counts()

0    33660
1    29810
2     6017
3     1023
4      200
5       17
6        2
Name: ann_emotions, dtype: int64

In [108]:
y2[y2['ann_emotions']==6].head(10)

Unnamed: 0,index,AssignmentId,WorkerId,Input.VIDEO_ID,Input.CLIP,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,Segment_ID,ann_emotions
2904,2905,3P1L2B7AD21BQT3PYDYEDSCOX1BOL3,A2TBXASXZIRNNW,npIVLL_fTf0,10,3.0,2.0,2.0,1.0,2.0,1.0,npIVLL_fTf010,6
53553,2462,3XC1O3LBOTYR3L8XUUKD72IBYHKTL4,A2EOOF9D135HQ1,sqa_mosi/znNt--6itO4,2,1.0,1.0,1.0,1.0,1.0,1.0,sqa_mosi/znNt--6itO42,6


## Method 1: compute mean of the 3 annotations

In [158]:
y2_mean = y2.groupby('Segment_ID').mean()

In [159]:
y2_mean.reset_index(inplace=True)

In [160]:
y2_mean.drop(columns=['index'])

Unnamed: 0,Segment_ID,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise
0,--qXJuDtHPw5,0.0,0.0,0.000000,0.666667,0.000000,0.0
1,-3g5yACwYnA10,0.0,0.0,0.666667,0.666667,0.666667,0.0
2,-3g5yACwYnA13,0.0,0.0,0.000000,0.000000,0.000000,0.0
3,-3g5yACwYnA2,0.0,0.0,0.333333,0.666667,0.666667,0.0
4,-3g5yACwYnA3,0.0,0.0,0.000000,0.333333,0.333333,0.0
...,...,...,...,...,...,...,...
23513,zwTrXwi54us6,0.0,0.0,0.000000,0.000000,0.000000,0.0
23514,zwTrXwi54us7,0.0,0.0,0.000000,0.000000,0.000000,0.0
23515,zwTrXwi54us8,0.0,0.0,0.000000,1.000000,0.000000,0.0
23516,zwTrXwi54us9,0.0,0.0,0.000000,0.333333,0.000000,0.0


In [161]:
# Creation of ann_emotions column to know how many emotions have been labeled for each row (= sequence)
y2_mean["ann_emotions"] = ""
for index, row in y2_mean.iterrows():
    compt = 0
    for i in emotions:
        if row[i] > 0:
            compt+=1
    y2_mean['ann_emotions'].iloc[index] = compt

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [162]:
y2_mean['ann_emotions'].value_counts()

1    11332
2     5656
0     3736
3     2134
4      568
5       84
6        8
Name: ann_emotions, dtype: int64

In [163]:
y2_mean['max_row'] = ''
for index, row in y2_mean.iterrows():
    liste = []
    for i in emotions:
        liste.append(row[i])
    maximum = max(liste)
    y2_mean['max_row'].iloc[index] = maximum

In [164]:
y2_mean

Unnamed: 0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,ann_emotions,max_row
0,--qXJuDtHPw5,2089.0,0.0,0.0,0.000000,0.666667,0.000000,0.0,1,0.666667
1,-3g5yACwYnA10,505.0,0.0,0.0,0.666667,0.666667,0.666667,0.0,3,0.666667
2,-3g5yACwYnA13,2311.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0,0.000000
3,-3g5yACwYnA2,1894.0,0.0,0.0,0.333333,0.666667,0.666667,0.0,3,0.666667
4,-3g5yACwYnA3,8986.0,0.0,0.0,0.000000,0.333333,0.333333,0.0,2,0.333333
...,...,...,...,...,...,...,...,...,...,...
23513,zwTrXwi54us6,10771.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0,0.000000
23514,zwTrXwi54us7,10774.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0,0.000000
23515,zwTrXwi54us8,10777.0,0.0,0.0,0.000000,1.000000,0.000000,0.0,1,1.000000
23516,zwTrXwi54us9,10780.0,0.0,0.0,0.000000,0.333333,0.000000,0.0,1,0.333333


In [165]:
y2_mean['nb_max'] = ''
for index, row in y2_mean.iterrows():
    compt = 0
    if row['max_row'] != 0:
        for i in emotions:
            if row[i] == row['max_row']:
                compt+=1
    y2_mean['nb_max'].iloc[index] = compt

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [166]:
y2_mean.head(2)

Unnamed: 0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,ann_emotions,max_row,nb_max
0,--qXJuDtHPw5,2089.0,0.0,0.0,0.0,0.666667,0.0,0.0,1,0.666667,1
1,-3g5yACwYnA10,505.0,0.0,0.0,0.666667,0.666667,0.666667,0.0,3,0.666667,3


In [167]:
y2_mean['nb_max'].value_counts()

1    16911
0     3736
2     2399
3      403
4       59
5       10
Name: nb_max, dtype: int64

### Binary method for classification with neutral column creation

In [153]:
# Looking at the data, we see that 3_000 out of 23_000 rows have at least two emotions given the same importance.
# As we want to have one emotion rising, we chose to drop them.

In [228]:
y3 = y2_mean[y2_mean['nb_max'] <= 1]

In [229]:
y3.reset_index(inplace=True)

In [230]:
y3['nb_max'].value_counts()

1    16911
0     3736
Name: nb_max, dtype: int64

In [231]:
y3.head(2)

Unnamed: 0,level_0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,ann_emotions,max_row,nb_max
0,0,--qXJuDtHPw5,2089.0,0.0,0.0,0.0,0.666667,0.0,0.0,1,0.666667,1
1,2,-3g5yACwYnA13,2311.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0


In [232]:
# If emotion is max then 1, else 0
for index, row in y3.iterrows():
    if row['max_row'] != 0:
        for i in emotions:
            if row[i] == row['max_row']:
                y3.loc[index, i] = 1
            else: 
                y3.loc[index, i] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [233]:
y3

Unnamed: 0,level_0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,ann_emotions,max_row,nb_max
0,0,--qXJuDtHPw5,2089.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0.666667,1
1,2,-3g5yACwYnA13,2311.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0
2,5,-3g5yACwYnA4,14914.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0.666667,1
3,6,-3g5yACwYnA9,7084.0,0.0,0.0,0.0,0.0,1.0,0.0,2,0.666667,1
4,7,-3nNcZdcdvU0,11776.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1.666667,1
...,...,...,...,...,...,...,...,...,...,...,...,...
20642,23513,zwTrXwi54us6,10771.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0
20643,23514,zwTrXwi54us7,10774.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0
20644,23515,zwTrXwi54us8,10777.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1.000000,1
20645,23516,zwTrXwi54us9,10780.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0.333333,1


In [234]:
# Creation of neutral column if no emotion has been detected
y3['Answer.neutral'] = ''
y3['Answer.neutral'] = y3['ann_emotions'].apply(lambda x: 1 if x==0 else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y3['Answer.neutral'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y3['Answer.neutral'] = y3['ann_emotions'].apply(lambda x: 1 if x==0 else 0)


In [238]:
y3.drop(columns=['level_0', 'index'])

Unnamed: 0,Segment_ID,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,ann_emotions,max_row,nb_max,Answer.neutral
0,--qXJuDtHPw5,0.0,0.0,0.0,1.0,0.0,0.0,1,0.666667,1,0
1,-3g5yACwYnA13,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0,1
2,-3g5yACwYnA4,0.0,0.0,0.0,1.0,0.0,0.0,1,0.666667,1,0
3,-3g5yACwYnA9,0.0,0.0,0.0,0.0,1.0,0.0,2,0.666667,1,0
4,-3nNcZdcdvU0,0.0,0.0,0.0,1.0,0.0,0.0,1,1.666667,1,0
...,...,...,...,...,...,...,...,...,...,...,...
20642,zwTrXwi54us6,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0,1
20643,zwTrXwi54us7,0.0,0.0,0.0,0.0,0.0,0.0,0,0.000000,0,1
20644,zwTrXwi54us8,0.0,0.0,0.0,1.0,0.0,0.0,1,1.000000,1,0
20645,zwTrXwi54us9,0.0,0.0,0.0,1.0,0.0,0.0,1,0.333333,1,0


In [None]:
# Creation of y3_final taking correct Segment_ID, sorting, and only taking emotions

# TO BE DONE

### Keeping the mean 

In [241]:
y4 = y2.groupby('Segment_ID').mean()
y4.reset_index(inplace=True)

In [244]:
y4.head()

Unnamed: 0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise
0,--qXJuDtHPw5,2089.0,0.0,0.0,0.0,0.666667,0.0,0.0
1,-3g5yACwYnA10,505.0,0.0,0.0,0.666667,0.666667,0.666667,0.0
2,-3g5yACwYnA13,2311.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-3g5yACwYnA2,1894.0,0.0,0.0,0.333333,0.666667,0.666667,0.0
4,-3g5yACwYnA3,8986.0,0.0,0.0,0.0,0.333333,0.333333,0.0


In [247]:
# Exploring columns that both have happiness and another 
y4['dupl_sad_happy'] = ""
for index, row in y4.iterrows():
    if row['Answer.happiness'] == 0:
        y4['dupl_sad_happy'].iloc[index] = 0
    else:
        if row['Answer.fear'] > 0 or row['Answer.sadness'] > 0:
            y4['dupl_sad_happy'].iloc[index] = 1
        else:
            y4['dupl_sad_happy'].iloc[index] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [248]:
y4.head()

Unnamed: 0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,dupl_sad_happy
0,--qXJuDtHPw5,2089.0,0.0,0.0,0.0,0.666667,0.0,0.0,0
1,-3g5yACwYnA10,505.0,0.0,0.0,0.666667,0.666667,0.666667,0.0,1
2,-3g5yACwYnA13,2311.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,-3g5yACwYnA2,1894.0,0.0,0.0,0.333333,0.666667,0.666667,0.0,1
4,-3g5yACwYnA3,8986.0,0.0,0.0,0.0,0.333333,0.333333,0.0,1


In [249]:
y4['dupl_sad_happy'].value_counts()

0    21195
1     2323
Name: dupl_sad_happy, dtype: int64

In [250]:
y4[y4['dupl_sad_happy'] ==1]

Unnamed: 0,Segment_ID,index,Answer.anger,Answer.disgust,Answer.fear,Answer.happiness,Answer.sadness,Answer.surprise,dupl_sad_happy
1,-3g5yACwYnA10,505.0,0.000000,0.0,0.666667,0.666667,0.666667,0.0,1
3,-3g5yACwYnA2,1894.0,0.000000,0.0,0.333333,0.666667,0.666667,0.0,1
4,-3g5yACwYnA3,8986.0,0.000000,0.0,0.000000,0.333333,0.333333,0.0,1
11,-571d8cVauQ3,2098.0,0.000000,0.0,0.000000,0.333333,0.333333,0.0,1
21,-Alixo7euuU17,12388.0,0.000000,0.0,0.000000,0.333333,0.333333,0.0,1
...,...,...,...,...,...,...,...,...,...
23473,zk2jTlAtvSU3,1642.0,0.000000,0.0,0.333333,0.333333,0.000000,0.0,1
23475,zknhrUs7-lI1,3034.0,0.000000,0.0,0.333333,0.333333,0.666667,0.0,1
23481,zqkawTdHN5s1,2911.0,0.000000,0.0,0.333333,1.666667,0.000000,0.0,1
23488,zrFZAofNGi45,490.0,0.666667,0.0,0.666667,0.333333,1.666667,0.0,1
