In [7]:
import pandas as pd
from textblob import TextBlob
import spacy
import numpy as np

nlp = spacy.load("en_core_web_sm")

In [8]:
df = pd.read_csv('Raw_data/lines.csv')

In [9]:
df

Unnamed: 0,name,ep_szn,title,lines
0,The boys,101,"""Cartman Gets an Anal Probe""","School days, school days, teacher's golden ru...."
1,Kyle Broflovski,101,"""Cartman Gets an Anal Probe""","Ah, damn it! My little brother's trying to fol..."
2,Ike Broflovski,101,"""Cartman Gets an Anal Probe""",Ee gko zeeponanner.
3,Kyle,101,"""Cartman Gets an Anal Probe""","Ike, you can't come to school with me. [Ike ch..."
4,Eric Cartman,101,"""Cartman Gets an Anal Probe""","Yeah, go home you little dildo!"
...,...,...,...,...
86939,Sharon,2606,"""Spring Break""","Stanley, what have I told you about playing ""W..."
86940,Stan,2606,"""Spring Break""",Aw!
86941,Garrison,2606,"""Spring Break""","Oh, Rick!"
86942,Garrison,2606,"""Spring Break""",I don't want to go through all that again. I j...


In [10]:
df[df.name == 'Eric']

Unnamed: 0,name,ep_szn,title,lines
22141,Eric,415,"""Fat Camp""","[pissed off] Aw, damnit!"


# Pre-processing

* Inspect NaN values and decide what to do with these rows
* Split strings so that we're not just dealing with one big string for each line
* Remove punctuation from all of the lines

In [11]:
df.isna().sum()

name      31
ep_szn     0
title      0
lines      6
dtype: int64

In [12]:
# Let's have a look at some..
# We can see we have some lines with no speaker and also some "stage direction" type of comments

df[df.name.isna()].head(5)

Unnamed: 0,name,ep_szn,title,lines
53949,,1212,"""About Last Night...""","Boom, baby!"
75959,,2008,"""Members Only""",[the machine closes enseals Garrison]
75968,,2008,"""Members Only""","[Cartman, Heidi, and the secretary turn their ..."
76089,,2008,"""Members Only""",[everyone continues walking]
76213,,2009,"""Not Funny""",[crowd stops chattering]


In [13]:
# & keeps rows where ALL conditions are True

df = df[~(df.name.isna()) & (~df.lines.isna())].reset_index(drop = True)

In [14]:
# Check it's worked

df.isna().sum()

name      0
ep_szn    0
title     0
lines     0
dtype: int64

In [15]:
df.loc[0, 'lines']

"School days, school days, teacher's golden ru... [Ike runs to the team]"

In [16]:
# Before we go ahead and strip all punctuation we want to remove all of the "stage direction" stuff

df['lines'].replace(to_replace="\[.*?]", value="", regex=True, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['lines'].replace(to_replace="\[.*?]", value="", regex=True, inplace=True)


In [17]:
# Let's check if it worked

df.loc[0, 'lines']

"School days, school days, teacher's golden ru... "

In [18]:
df[df.lines == " God?? I can't do that. I haven't spoken to God in like, 5000 years."]

Unnamed: 0,name,ep_szn,title,lines
20680,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...


In [19]:
df['string_split'] = df['lines'].str.split()

In [20]:
df.loc[0, 'string_split']

['School', 'days,', 'school', 'days,', "teacher's", 'golden', 'ru...']

In [21]:
English_punctuation = "!\"#$%&()*+,./:;<=>?@[\]^_`{|}~“”-"      # Define a variable with all the punctuation to remove.
print(English_punctuation)                                     # Print that defined variable, just to check it is correct.
print("...") 

def remove_punctuation(from_text):                           # Had to define a function to iterate over the strings in a row
    table = str.maketrans('', '', English_punctuation)       # The python function 'maketrans' creates a table that maps
    stripped = [w.translate(table) for w in from_text]        # the punctation marks to 'None'. Print the table to check. 
    return stripped

!"#$%&()*+,./:;<=>?@[\]^_`{|}~“”-
...


In [22]:
df['no_punct'] = df['string_split'].apply(remove_punctuation)

# Extract names from lines

What we want to do now is find a way to pull out every name from the lines in our dataset. 
Luckily for us when we did our webscraping, the webpage was formatted in a way that the names were easy to pull out into their own column. 

So, below, you'll see that what we do is create a variable which contains a list of all our unique names in the dataset. We then create 2 empty lists:

* match = will hold the names that match those in our unique names list
* index = will hold the indexes that pertain to the row of the dataset we are iterating through

You can see we also have a counter which is set to -1, because our index starts at 0. 
Then we have a series of for loops:

* first for loop iterates through each list of split strings in the dataset
* second for loop then iterates through each indivdual string/word within each list
* third for loop then iterates through each unique name in our list of unique names

Then we have an if statement which checks if the individual string matches any name in the list of names.
If it does we then append the name to our empty match list, along with its index.

In [23]:
names = list(df.name.unique())

In [24]:
names[:5]

['The boys', 'Kyle Broflovski', 'Ike Broflovski', 'Kyle', 'Eric Cartman']

Executing the cell below may take a few minutes as it's iterating through 86907 lines which contain 11 strings on average.

In [25]:
match = []
index = []
counter = -1

for lines in df.no_punct:
    counter += 1
    for word in lines:
        for i in names:
            if word == i:
                match.append(i)
                index.append(counter)

                

In [26]:
# Let's check out our match list

match[:10]

['Ike',
 'Dude',
 'Cartman',
 'Kenny',
 'Dude',
 'Ike',
 'Cartman',
 'Dude',
 'Cartman',
 'Chef']

In [27]:
# Let's look at our index list

index[:10]

[3, 5, 7, 11, 14, 15, 18, 28, 31, 37]

In [28]:
# Seems right! Although may need to deal with "Dude"

df.loc[index[:10]]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct
3,Kyle,101,"""Cartman Gets an Anal Probe""","Ike, you can't come to school with me.","[Ike,, you, can't, come, to, school, with, me.]","[Ike, you, can't, come, to, school, with, me]"
5,Kyle,101,"""Cartman Gets an Anal Probe""","Dude, don't call my brother a dildo!","[Dude,, don't, call, my, brother, a, dildo!]","[Dude, don't, call, my, brother, a, dildo]"
7,Kyle,101,"""Cartman Gets an Anal Probe""","Well, I don't know... and I'll bet Cartman doe...","[Well,, I, don't, know..., and, I'll, bet, Car...","[Well, I, don't, know, and, I'll, bet, Cartman..."
11,Stan,101,"""Cartman Gets an Anal Probe""","What's a dildo, Kenny?","[What's, a, dildo,, Kenny?]","[What's, a, dildo, Kenny]"
14,Stan,101,"""Cartman Gets an Anal Probe""","Dude, that kicks ass!","[Dude,, that, kicks, ass!]","[Dude, that, kicks, ass]"
15,Kyle,101,"""Cartman Gets an Anal Probe""","Yeah, check this one out. Ready Ike? Kick the ...","[Yeah,, check, this, one, out., Ready, Ike?, K...","[Yeah, check, this, one, out, Ready, Ike, Kick..."
18,Stan,101,"""Cartman Gets an Anal Probe""","Whoa, Cartman! Looks like you didn't get much ...","[Whoa,, Cartman!, Looks, like, you, didn't, ge...","[Whoa, Cartman, Looks, like, you, didn't, get,..."
28,Stan,101,"""Cartman Gets an Anal Probe""",Dude! Visitors!,"[Dude!, Visitors!]","[Dude, Visitors]"
31,Stan,101,"""Cartman Gets an Anal Probe""","That wasn't a dream Cartman, those were visitors!","[That, wasn't, a, dream, Cartman,, those, were...","[That, wasn't, a, dream, Cartman, those, were,..."
37,Boys,101,"""Cartman Gets an Anal Probe""","Hey, Chef.","[Hey,, Chef.]","[Hey, Chef]"


In [29]:
# Let's create our filtered dataset using our indexes

filtered = df.loc[index]

In [38]:
# Let's assign our list of matches to a new column

filtered['matches'] = match

In [53]:
filtered[filtered.name == "Mom"]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct,matches
18798,Mom,406,"""Cartman Joins NAMBLA""","You can't eat, Kenny. We have to save food for...","[You, can't, eat,, Kenny., We, have, to, save,...","[You, can't, eat, Kenny, We, have, to, save, f...",Kenny
18800,Mom,406,"""Cartman Joins NAMBLA""","Kenny, you have to change the baby's diapers!","[Kenny,, you, have, to, change, the, baby's, d...","[Kenny, you, have, to, change, the, baby's, di...",Kenny
33034,Mom,702,"""Krazy Kripples""","There you are, Jimmy!","[There, you, are,, Jimmy!]","[There, you, are, Jimmy]",Jimmy
35833,Mom,712,"""All About Mormons""","Hey, it's Gary!","[Hey,, it's, Gary!]","[Hey, it's, Gary]",Gary
42236,Mom,907,"""Erection Day""","Shauna honey, I think your little date is here.","[Shauna, honey,, I, think, your, little, date,...","[Shauna, honey, I, think, your, little, date, ...",Shauna
43314,Mom,911,"""Ginger Kids""","Clyde? What is it, honey?","[Clyde?, What, is, it,, honey?]","[Clyde, What, is, it, honey]",Clyde
49521,Mom,1108,"""Le Petit Tourette""","A-alright Thomas, maybe we should go?","[A-alright, Thomas,, maybe, we, should, go?]","[Aalright, Thomas, maybe, we, should, go]",Thomas
49529,Mom,1108,"""Le Petit Tourette""","Oh, a-I'm sorry. My... son Thomas has Tourett...","[Oh,, a-I'm, sorry., My..., son, Thomas, has, ...","[Oh, aI'm, sorry, My, son, Thomas, has, Touret...",Thomas
49532,Mom,1108,"""Le Petit Tourette""",People with Tourette's can't control certain t...,"[People, with, Tourette's, can't, control, cer...","[People, with, Tourette's, can't, control, cer...",People
49536,Mom,1108,"""Le Petit Tourette""","It's okay, Thomas. They understand.","[It's, okay,, Thomas., They, understand.]","[It's, okay, Thomas, They, understand]",Thomas


In [57]:
df[df.name == "Eric"]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct
22139,Eric,415,"""Fat Camp""","Aw, damnit!","[Aw,, damnit!]","[Aw, damnit]"


In [55]:
filtered[filtered.name.str.contains("American")]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct,matches
34431,Native American chief,707,"""Red Man's Greed""",Welcome to the Three Feathers Casino. I'm your...,"[Welcome, to, the, Three, Feathers, Casino., I...","[Welcome, to, the, Three, Feathers, Casino, I'...",Chief
45110,American male 1,1004,"""Cartoon Wars Part II""","Hello, I am American.","[Hello,, I, am, American.]","[Hello, I, am, American]",American
45111,American male 2,1004,"""Cartoon Wars Part II""",I'm American too.,"[I'm, American, too.]","[I'm, American, too]",American
45115,American Female,1004,"""Cartoon Wars Part II""","I'm American. I'm pregnant with a baby, but I...","[I'm, American., I'm, pregnant, with, a, baby,...","[I'm, American, I'm, pregnant, with, a, baby, ...",American
72508,Three students dressed in Native American garb,1902,"""Where My Country Gone?""","We are the Hopewell and Abenaki, first settler...","[We, are, the, Hopewell, and, Abenaki,, first,...","[We, are, the, Hopewell, and, Abenaki, first, ...",Canadian


In [43]:
filtered.matches.value_counts().nlargest(50)

matches
Kyle         2140
God          1797
All          1533
Butters      1429
Cartman      1356
Stan         1324
Dude         1193
Eric         1163
Kenny        1043
Jesus         816
Dad           619
Mom           525
Randy         464
Chef          399
Right         348
Garrison      345
Wendy         340
President     321
Ike           318
Tolkien       317
Jimmy         307
Tom           306
Everyone      290
Phillip       272
Terrance      270
People        263
Clyde         254
Craig         249
Stanley       249
woman         240
Sharon        238
Timmy         220
Tweek         202
Hat           189
Scott         185
TV            182
Mackey        168
Heidi         165
Man           162
American      162
Red           152
Boys          141
Principal     138
Mayor         133
New           126
Canadian      125
Bebe          124
Shelly        123
Ned           122
Satan         115
Name: count, dtype: int64

In [24]:
# We have duplicate indexes which is annoying

filtered[filtered.lines == " God?? I can't do that. I haven't spoken to God in like, 5000 years."]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct,matches
20680,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God
20680,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God


In [25]:
# Let's check it out
# Looks like we should reset our index!

filtered[['lines', 'matches']].head()

Unnamed: 0,lines,matches
3,"Ike, you can't come to school with me.",Ike
5,"Dude, don't call my brother a dildo!",Dude
7,"Well, I don't know... and I'll bet Cartman doe...",Cartman
11,"What's a dildo, Kenny?",Kenny
14,"Dude, that kicks ass!",Dude


In [26]:
filtered[filtered.lines == " God?? I can't do that. I haven't spoken to God in like, 5000 years."]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct,matches
20680,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God
20680,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God


In [27]:
# Let's reset our index

filtered = filtered.reset_index(drop = True)

In [28]:
# Now we don't have duplicate indexes!

filtered[filtered.lines == " God?? I can't do that. I haven't spoken to God in like, 5000 years."]

Unnamed: 0,name,ep_szn,title,lines,string_split,no_punct,matches
7643,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God
7644,Satan,411,"""Probably""",God?? I can't do that. I haven't spoken to Go...,"[God??, I, can't, do, that., I, haven't, spoke...","[God, I, can't, do, that, I, haven't, spoken, ...",God


# Nearly done...

Now all we need to do in order to make this dataset network-ready is to compute number of times each character mentions another character.

In [29]:
counts = filtered[['name', 'matches']].value_counts().reset_index()

In [58]:
counts

Unnamed: 0,name,matches,count
0,Cartman,Kyle,1011
1,Cartman,Butters,567
2,Kyle,Cartman,564
3,Stan,Dude,495
4,Cartman,Kenny,401
...,...,...,...
10132,Memberberry 4,Mickey,1
10133,Memberberry 3,Death,1
10134,Memberberry 2,Tubbs,1
10135,Memberberry 1,Member,1


In [62]:
pivot_table = counts.pivot_table(index = 'name', columns = 'matches', values = 'count', fill_value = 0)

In [63]:
pivot_table

matches,Adam,Adams,Adolf,Adults,Agent,Aguilar,Al,Alan,Alarm,Alec,...,Yolanda,YourMomsTits,Zazul,Zeus,Zombie,Zytar,driver,instructor,woman,worker
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Ash""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Beaky Buzzard""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Brokaw""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Cupid Ye""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Droopy Dawg""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zazul,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zeus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f*ther Maxi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
instructor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
import os

os.getcwd()

'/Users/loucap/Documents/GitWork/SNA'

# Save matrix and counts df to csv

In [72]:
counts.to_csv('Data/counts.csv', index = False)

In [73]:
pivot_table.to_csv('Data/pivottab.csv', index = False)

In [38]:
all_characters = set(counts['name']).union(set(counts['matches']))

In [39]:
len(all_characters)

2586

In [41]:
from nxviz import MatrixPlot

In [42]:
import networkx as nx
import matplotlib.pyplot as plt

In [43]:
# G = nx.DiGraph(out)

In [44]:
# m = MatrixPlot(G)


In [45]:
# Convert the pivoted DataFrame to a directed graph
# G = nx.from_pandas_adjacency(pivoted_2, create_using=nx.DiGraph)

# Create a MatrixPlot object
# m = MatrixPlot(G)

# Display the plot
# plt.figure(figsize=(8, 8))  # Optional: Set the plot size
# m.draw()
# plt.show()

# Rudimentary network

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import xlrd

In [None]:
G = nx.Graph()
names = []

zipped = list(zip(red['name'], red['matches']))

In [None]:
G.add_edges_from(zipped)

In [None]:
nx.draw(G, with_labels = True)

In [None]:
plt.show()

# Networkx

In [None]:
b[(b.name == 'Cartman') & (b.matches == 'Butters')]

In [None]:
G2 = nx.from_pandas_edgelist(b, source = "name", target = "matches", edge_attr = "count", create_using = nx.Graph())

In [None]:
plt.figure(figsize = (10,10))
pos = nx.kamada_kawai_layout(G2)
nx.draw(G2, with_labels = True, node_color = 'skyblue', edge_cmap=plt.cm.Blues, pos = pos, font_size = 8, node_size = 150)
plt.show()

In [None]:
total_nodes = pd.concat([b['name'], b['matches']]).nunique()
print(total_nodes)

In [None]:
top_counts = b[b['count'] > 10]

In [None]:
len(top_counts)

In [None]:
total_nodes = pd.concat([top_counts['name'], top_counts['matches']]).nunique()
print(total_nodes)

In [None]:
G3 = nx.from_pandas_edgelist(top_counts, source = "name", target = "matches", edge_attr = "count", create_using = nx.Graph())

In [None]:
plt.figure(figsize = (10,10))
pos = nx.kamada_kawai_layout(G3)
nx.draw(G3, with_labels = True, node_color = 'skyblue', edge_cmap=plt.cm.Blues, pos = pos, font_size = 8, node_size = 150)
plt.show()

In [None]:
nx.degree_centrality(G3)

In [None]:
9759 / 10137 * 100

In [None]:
# from pyvis.network import Network
# net = Network(notebook = True, width="1000px", height= "700px", bgcolor = "#222222", font_color='white')
# net.from_nx(G2)
# net.show("southpark.html")

# X_array stuff

In [None]:
# xarray

ds = filtered.to_xarray()

In [None]:
ds.set_coords(['name', 'matches', 'n_mentions'])

In [None]:
for i in na:
    if i == 'Sorry':
        print(i)

Things to consider:

The boys use "Dude" a lot throughout to refer to each other, but in a few of the episodes (3) there is a generic "Dude" character, and he does reference others. Should we filter these out?

We also need to filter out "Sorry". It appears once in 

In [None]:
df_c[df_c.name == 'Sorry'].head(70)

In [None]:
pd.set_option('display.max_rows', 500)

df_c[df_c.ep_szn == 218]

# Extract named entities

In [None]:
df_c['lines'].replace(to_replace="\[.*?]", value="", regex=True, inplace=True)

In [None]:
df_c['lines'][98]

In [None]:
b = df_c.lines.tolist()

In [None]:
ent_list = []

for i in b[:5]:
    print("aye", i)
    doc = nlp(i) 
    for entity in doc.ents:
        ent_list.append(entity.text)
    else:
        ent_list.append("None")
        

In [None]:
ent_list

In [None]:
sent_ent = []


for line, char in zip(df_c.lines, df_c.name):
    doc = nlp(line)
    entity_list = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']
    sent_ent.append({"name": char, "line": line, "entities": entity_list})
    
    for ent in doc.ents:
        print(ent.label_)
#     doc = nlp(i)
#     for entity in doc.ents:
#         entity_list.append(entity.text)
#     else:
#         entity_list.append("None")

In [None]:
sent_ent

In [None]:
for i in sent_ent[:10]:
    print(i['entities'])

In [None]:
for i in sent_ent[:3]:
    print(i)

In [None]:
df_c.name

In [None]:
entity_list

In [None]:
df_c['Entities'] = entity_list

In [None]:
na_removed

In [None]:
na_removed['line_sent'] = na_removed['lines'].apply(lambda x: TextBlob(x).sentiment[0])

In [None]:
na_removed

In [None]:
cartman = na_removed[na_removed.name == 'Cartman']

In [None]:
cartman

In [None]:
cartman.line_sent.mean()

In [None]:
cartman.line_sent.sum() / len(cartman)

In [None]:
zero_removed = cartman[cartman.line_sent != 0.0000]

In [None]:
zero_removed.line_sent.mean()

In [None]:
kyle = na_removed[na_removed.name == 'Kyle']

In [None]:
kyle.line_sent.sum() / len(kyle)

In [None]:
wendy = na_removed[na_removed.name == 'Wendy']

In [None]:
wendy

In [None]:
wendy.line_sent.sum() / len(wendy)

In [None]:
zero_removed = sheila[sheila.line_sent != 0.0000]

In [None]:
zero_removed.line_sent.sum() / len(zero_removed)

In [None]:
kenny

In [None]:
kenny = na_removed[na_removed.name == 'Kenny']

In [None]:
kenny.line_sent.mean()

In [None]:
zero_removed = kenny[kenny.line_sent != 0.0000]

In [None]:
zero_removed.line_sent.mean()

In [None]:
garrison = na_removed[na_removed.name == 'Garrison']

In [None]:
garrison

In [None]:
garrison.line_sent.mean()

In [None]:
means = []
names = []

for i in na_removed.name.unique():
    names.append(i)
    unique = na_removed[na_removed.name == i]
    mean = unique.line_sent.mean()
    means.append(mean)
    

In [None]:
df = pd.DataFrame(list(zip(names, means)),
               columns =['Name', 'Mean'])

In [None]:
df.sort_values(by = 'Mean', ascending = False)[:50]

In [None]:
import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

df.name.unique()