# Data Analysis
#### John R. Starr; jrs294@pitt.edu
Time for some analysis! Let's load in the usual modules and then begin our analysis.

In [1]:
import nltk
import numpy as np
import pandas as pd

In [2]:
df = pd.read_pickle('ordered_only_df.pkl')

A breakdown on what we've got:

In [3]:
df.head()

Unnamed: 0,ID,Eng,Far,Eng_Tok,Far_Tok,Eng_Len,Far_Len,Eng_Types,Far_Types,Far_POS,Far_Chunks,Eng_POS,Eng_Chunks,Word_Order
4,5,stop please stop,دست نگه داريد خواهش ميکنم دست نگه داريد,"[stop, please, stop]","[دست, نگه, داريد, خواهش, ميکنم, دست, نگه, داريد]",3,8,"{please, stop}","{نگه, دست, ميکنم, خواهش, داريد}","[(دست, N), (نگه, N), (داريد, V), (خواهش, Ne), ...",[دست NP] [نگه داريد VP] [خواهش ميکنم دست NP] [...,"[(stop, JJ), (please, NN), (stop, VB)]","[[(stop, JJ), (please, NN)], [[('stop', 'VB')]]]",SVO
8,9,god damn it put that down,لعنت به تو اونو بذار زمين,"[god, damn, it, put, that, down]","[لعنت, به, تو, اونو, بذار, زمين]",6,6,"{damn, down, put, that, god, it}","{زمين, بذار, لعنت, اونو, تو, به}","[(لعنت, N), (به, P), (تو, PRO), (اونو, PRO), (...",[لعنت NP] [به PP] [تو NP] [اونو NP] [بذار VP] ...,"[(god, NN), (damn, VBZ), (it, PRP), (put, VBD)...","[[(god, NN)], [[('damn', 'VBZ')]], (it, PRP), ...",SOV
10,11,its the last feed weve got,اين آخرين علوفه اي بود که ما داشتيم,"[its, the, last, feed, weve, got]","[اين, آخرين, علوفه, اي, بود, که, ما, داشتيم]",6,8,"{the, its, got, feed, weve, last}","{علوفه, آخرين, بود, داشتيم, ما, اين, اي, که}","[(اين, Ne), (آخرين, NUM), (علوفه, N), (اي, N),...",[اين آخرين علوفه NP] [اي NP] [بود VP] که [ما N...,"[(its, PRP$), (the, DT), (last, JJ), (feed, NN...","[(its, PRP$), [(the, DT), (last, JJ), (feed, N...",SOV
14,15,you lied to me dan,تو به من دروغ گفتي دن,"[you, lied, to, me, dan]","[تو, به, من, دروغ, گفتي, دن]",5,6,"{lied, me, dan, to, you}","{گفتي, تو, به, دن, دروغ, من}","[(تو, PRO), (به, P), (من, PRO), (دروغ, AJ), (گ...",[تو NP] [به PP] [من NP] [دروغ NP] [گفتي VP] دن,"[(you, PRP), (lied, VBD), (to, TO), (me, PRP),...","[(you, PRP), [[('lied', 'VBD')]], (to, TO), (m...",SOV
15,16,you told me we made payments to hollander we did,تو به من گفتي قرضمونو به هلندر پرداخت کرديم ما...,"[you, told, me, we, made, payments, to, hollan...","[تو, به, من, گفتي, قرضمونو, به, هلندر, پرداخت,...",10,12,"{hollander, told, we, me, did, to, made, you, ...","{گفتي, تو, به, هلندر, کرديم, ما, من, پرداخت, ق...","[(تو, PRO), (به, P), (من, PRO), (گفتي, V), (قر...",[تو NP] [به PP] [من NP] [گفتي VP] [قرضمونو NP]...,"[(you, PRP), (told, VBD), (me, PRP), (we, PRP)...","[(you, PRP), [[('told', 'VBD')]], (me, PRP), (...",SOV


In [4]:
print(len(df))
print()
print(df['Word_Order'].value_counts())

76715

SOV    52756
SVO    23959
Name: Word_Order, dtype: int64


Well, we have over double the number of SOV sentences than we do SVO sentences -- this is good, since Persian is underlyingly SOV. Let's separate the different orderings into their own respective DFs and then examine some of the structures that we find in both.

In [10]:
#  Sorting by the values
df.sort_values(by=['Word_Order'], inplace = True)

In [12]:
# Making sure everything is reset
df.set_index(keys=['Word_Order'], drop=False,inplace=True)

In [15]:
# Creating new DFs
sov_df = df.loc[df.Word_Order == 'SOV']
svo_df = df.loc[df.Word_Order == 'SVO']

In [16]:
# Making sure they are the proper lengths
print(len(sov_df))
print(len(svo_df))

52756
23959


Cool! Let's start with the SOV data first.

In [18]:
sov_df.head()

Unnamed: 0_level_0,ID,Eng,Far,Eng_Tok,Far_Tok,Eng_Len,Far_Len,Eng_Types,Far_Types,Far_POS,Far_Chunks,Eng_POS,Eng_Chunks,Word_Order
Word_Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
SOV,276474,i will tell your wife he is your son,من به زنت ميگم ، اون پسر توئه,"[i, will, tell, your, wife, he, is, your, son]","[من, به, زنت, ميگم, ،, اون, پسر, توئه]",9,8,"{tell, he, your, will, i, son, wife, is}","{پسر, به, ميگم, توئه, من, زنت, اون, ،}","[(من, PRO), (به, P), (زنت, N), (ميگم, V), (،, ...",[من NP] [به PP] [زنت NP] [ميگم VP] ، [اون پسر ...,"[(i, NN), (will, MD), (tell, VB), (your, PRP$)...","[[(i, NN)], (will, MD), [[('tell', 'VB')]], (y...",SOV
SOV,273398,someones trying to shoot us,يكي ميخواد به ما شليك كنه,"[someones, trying, to, shoot, us]","[يكي, ميخواد, به, ما, شليك, كنه]",5,6,"{shoot, trying, someones, to, us}","{كنه, به, ما, شليك, يكي, ميخواد}","[(يكي, DET), (ميخواد, N), (به, P), (ما, PRO), ...",[يكي ميخواد NP] [به PP] [ما NP] [شليك VP] كنه,"[(someones, NNS), (trying, VBG), (to, TO), (sh...","[(someones, NNS), [[('trying', 'VBG')]], (to, ...",SOV
SOV,479066,sa yong's right. is there a ransom demand?,حق با سا يونگ است. ممكنه كه نقشه ديگه اي هم در...,"[sa, yong, 's, right, ., is, there, a, ransom,...","[حق, با, سا, يونگ, است, ., ممكنه, كه, نقشه, دي...",11,16,"{demand, ransom, yong, there, 's, right, sa, ?...","{ممكنه, حق, نقشه, هم, سر, اي, با, كه, در, ديگه...","[(حق, N), (با, P), (سا, N), (يونگ, N), (است, V...",[حق NP] [با PP] [سا يونگ NP] [است VP] . [ممكنه...,"[(sa, NN), (yong, PRP), ('s, POS), (right, JJ)...","[[(sa, NN)], (yong, PRP), ('s, POS), [(right, ...",SOV
SOV,273426,actually a thank you would be nice,درحقيقت يک تشكر ميتونه خوب باشه,"[actually, a, thank, you, would, be, nice]","[درحقيقت, يک, تشكر, ميتونه, خوب, باشه]",7,6,"{thank, actually, would, be, nice, you, a}","{باشه, خوب, ميتونه, تشكر, يک, درحقيقت}","[(درحقيقت, Ne), (يک, N), (تشكر, P), (ميتونه, N...",[درحقيقت يک NP] [تشكر PP] [ميتونه خوب NP] [باش...,"[(actually, RB), (a, DT), (thank, NN), (you, P...","[(actually, RB), [(a, DT), (thank, NN)], (you,...",SOV
SOV,273430,i dont want an axe thats crazy why would you o...,من يک تبر نميخوام اين احمقانه است,"[i, dont, want, an, axe, thats, crazy, why, wo...","[من, يک, تبر, نميخوام, اين, احمقانه, است]",14,7,"{want, an, thats, crazy, why, offer, dont, wou...","{احمقانه, تبر, نميخوام, اين, من, است, يک}","[(من, PRO), (يک, NUM), (تبر, N), (نميخوام, V),...",[من NP] [يک تبر NP] [نميخوام VP] [اين احمقانه ...,"[(i, NNS), (dont, VBP), (want, VBP), (an, DT),...","[(i, NNS), [[('dont', 'VBP')]], [[('want', 'VB...",SOV


Now the SVO data.

In [19]:
svo_df.head()

Unnamed: 0_level_0,ID,Eng,Far,Eng_Tok,Far_Tok,Eng_Len,Far_Len,Eng_Types,Far_Types,Far_POS,Far_Chunks,Eng_POS,Eng_Chunks,Word_Order
Word_Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
SVO,434130,what the hell are you doing?,تو داري چه غلطي ميكني؟,"[what, the, hell, are, you, doing, ?]","[تو, داري, چه, غلطي, ميكني؟]",7,5,"{doing, the, what, ?, hell, you, are}","{غلطي, تو, چه, ميكني؟, داري}","[(تو, PRO), (داري, V), (چه, DET), (غلطي, N), (...",[تو NP] [داري VP] [چه غلطي ميكني؟ NP],"[(what, WP), (the, DT), (hell, NN), (are, VBP)...","[(what, WP), [(the, DT), (hell, NN)], [[('are'...",SVO
SVO,501157,as you know this is a war to drive the han arm...,شما مي دونيد اين يه جنگ هست که مي خوايم ارتش ه...,"[as, you, know, this, is, a, war, to, drive, t...","[شما, مي, دونيد, اين, يه, جنگ, هست, که, مي, خو...",17,21,"{han, they, out, drive, as, the, after, oppres...","{از, بيرون, بعد, ظلمي, يه, اونا, شما, را, کنيم...","[(شما, PRO), (مي, N), (دونيد, V), (اين, N), (ي...",[شما NP] [مي دونيد VP] [اين NP] [يه PP] [جنگ N...,"[(as, IN), (you, PRP), (know, VBP), (this, DT)...","[[(as, IN)], (you, PRP), [[('know', 'VBP')], [...",SVO
SVO,493066,i will do whatever your highness say but if so...,هر چه بخواهيد انجام مي دم اما اگر اتفاقي براي ...,"[i, will, do, whatever, your, highness, say, b...","[هر, چه, بخواهيد, انجام, مي, دم, اما, اگر, اتف...",14,12,"{do, baby, your, whatever, if, say, highness, ...","{هر, انجام, مي, دم, اگر, بخواهيد, بيافته, چه, ...","[(هر, DET), (چه, N), (بخواهيد, V), (انجام, Ne)...",[هر چه NP] [بخواهيد VP] [انجام مي NP] [دم VP] ...,"[(i, NN), (will, MD), (do, VB), (whatever, WDT...","[[(i, NN)], (will, MD), [[('do', 'VB')]], (wha...",SVO
SVO,493312,general heuk chi has requested to see you.,ژنرال هيوك چي تقاضا داره شما را ببينه,"[general, heuk, chi, has, requested, to, see, ...","[ژنرال, هيوك, چي, تقاضا, داره, شما, را, ببينه]",9,8,"{requested, general, chi, to, you, see, has, h...","{ببينه, ژنرال, چي, شما, را, تقاضا, هيوك, داره}","[(ژنرال, N), (هيوك, CONJ), (چي, DET), (تقاضا, ...",[ژنرال هيوك چي تقاضا NP] [داره VP] [شما NP] [ر...,"[(general, JJ), (heuk, NN), (chi, NN), (has, V...","[[(general, JJ), (heuk, NN), (chi, NN)], [[('h...",SVO
SVO,425438,i think they can be the alternatives for you.,فكر كنم اونها تنها چار? تو باشند,"[i, think, they, can, be, the, alternatives, f...","[فكر, كنم, اونها, تنها, چار, ?, تو, باشند]",10,8,"{for, the, be, can, ., they, i, you, think, al...","{اونها, تنها, تو, چار, ?, فكر, باشند, كنم}","[(فكر, N), (كنم, V), (اونها, PRO), (تنها, ADV)...",[فكر NP] [كنم VP] [اونها NP] [تنها چار NP] ? [...,"[(i, NN), (think, VBP), (they, PRP), (can, MD)...","[[(i, NN)], [[('think', 'VBP')]], (they, PRP),...",SVO
