# Mini-Project 3: Classification
Jimmy Sisenglath, Eric Pham, Justin Eugenio

In [1]:
# Import libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn.feature_extraction.text as sk_text

from sklearn import tree # Decision Tree.
from sklearn import metrics
from sklearn.svm import SVC # Support Vector Machine.
from sklearn.preprocessing import OneHotEncoder # One-hot encoding.
from sklearn.neighbors import KNeighborsClassifier # k-NN
from sklearn.linear_model import LogisticRegression # Logistic Regression.
from sklearn.preprocessing import StandardScaler # Neural networks.
from sklearn.neural_network import MLPClassifier # Neural networks.
from sklearn.model_selection import cross_val_score # Cross validation.
from sklearn.model_selection import train_test_split, GridSearchCV # Grid search.

# 1. Classification of Twitter Users
- In this project you will use the **original data you used in Project 1**. You will practice with algorithms for classification. **The goal of this project is to create classification models that predict if a user is a follower of Trump or Clinton.** In the file “clinton_trump_user_classes.txt”, we have the ground truth “class” membership for each user id in the data. Class 0 corresponds to Trump followers, while class 1 corresponds to Clinton followers.

In [2]:
# Read the 2016 Presidential election tweets and assigned column names. 

# Produces the tweet column name title on top of the table. Record type.
columns = ['Name', 'ScreenName', 'UserID', 'FollowersCount', 'FriendsCount','Location', 'Description', 'CreatedAt', 'StatusID', 'Language', 'Place', 'RetweetCount','FavoriteCount', 'Text']

# Old dataframe. Use the original data you used in Project 1.
tweets = pd.read_table('clinton_trump_tweets.txt', header=None, names=columns, encoding="ISO-8859-1")
pd.set_option('display.max_colwidth', -1) # Shows the whole message in the records.

# Prepare ground truth. Predict if a user is a follower of "Trump or Clinton (Supporter)".
# ground_truth = pd.read_table('data/clinton_trump_user_classes.txt', names=["UserID", "Supporter"], dtype=int, encoding ="ISO-8859-1")

# Show first 5 records.
tweets.head()

Unnamed: 0,Name,ScreenName,UserID,FollowersCount,FriendsCount,Location,Description,CreatedAt,StatusID,Language,Place,RetweetCount,FavoriteCount,Text
0,Cebel,Cebel6,1519696717,132,263,"Little Rock, Arkansas",Arkansas Razorback Fan Just trying to be #Uncommon one 1-0 day at a time.,Sat Oct 29 08:10:06 EEST 2016,792232017094119425,en,,0,1,@NWAJimmy I've read it now though brother. Was pretty spot on Lots of bright spots but a lot to work on. Exactly as an exhibition should be!
1,Cookie,Cookiemuffen,109945090,2154,2034,The American South,Got married after college. I don't regret starting a family instead of grad school. Proud Deplorable,Wed Oct 26 18:44:08 EEST 2016,791304413923213312,en,,1937,0,RT @wikileaks: New poll puts Pirate Party on course to win Iceland's national elections on Saturday. https://t.co/edTqjeJaQ6
2,nolaguy,nolaguy_phd,1450086582,797,1188,,"An LSU Ph.D student living in New Orleans, trying to find a second act.",Sat Oct 29 21:53:29 EEST 2016,792439227090767872,en,,0,0,@gaystoner821 I think New Orleans spoiled me with food. I need to try and branch out in BR.
3,Mark Hager,marksnark,167177185,204,448,Pittsburgh,"Hip, trendy, smart, funny, fit, lobbyist. U? Boilerplate: these thoughts are my own, not anyone else's. Hmmmkay?",Wed Oct 26 00:33:20 EEST 2016,791029904733331457,en,,891,0,RT @LOLGOP: ACA needs fixes but know da facts: *70% can get covered in marketplaces for under $75/month *Hikes affect 3% *GOP will uninsu
4,Capitalist Creations,aaronjhoddinott,1191022351,775,154,Canada,"Entrepreneur, startup investor, political junkie, free market supporter, beer connoisseur, dad and dog lover. Also a golf enthusiast despite my lack of skill.",Fri Oct 28 05:05:10 EEST 2016,791823089700962304,en,,7,0,RT @FastCompany: Alphabet shares soar on better-than-expected earnings as mobile video strategy pays off https://t.co/bokbXngMJt https://t.


In [3]:
# Drop unused columns.
tweets = tweets.drop(columns=['Name','ScreenName','FollowersCount','FriendsCount', 'Description','CreatedAt','StatusID','Language','Place','RetweetCount','FavoriteCount'])

In [4]:
# Drop null values in 'Location'.
# dropna: remove missing values.
tweets = tweets.dropna(subset=['Location'])

# Task 1.1
- Remove all retweets first. Remove all users that have less than 20 tweets. You may want to keep the entire tweet content, including hashtags/handles. For the remaining users, use **all available information in the dataset that you consider useful to create features** for classification (such as **Location, Description, Place**). You are also encouraged to use any conclusions you draw in Project 2 (clustering) to create any features to improve the classification result. **Use train_test_split() to split data into training and test sets, where 20 percent of the records go to test set.**

## *Remove all retweets first.*

In [5]:
tweets = tweets.drop(tweets[(tweets.Text.str.startswith("RT"))].index)

tweets.head()

Unnamed: 0,UserID,Location,Text
0,1519696717,"Little Rock, Arkansas",@NWAJimmy I've read it now though brother. Was pretty spot on Lots of bright spots but a lot to work on. Exactly as an exhibition should be!
6,106568768,"Dallas, TX",#infosec #Intel #ACM #IEEE Impacts Haswell microarch. Paper proposes mitigations that could prevent BTB-based side https://t.co/DW6vgRAPrv
7,17101060,The Universe,"Hacked e-mails show Clinton campaigns fears about Sanders | https://t.co/WMyCHuCDIc The Philippine Star (PhilippineStar) October 28, 2"
10,16818809,Long Island and beyond...,Hulk smash!
12,163712593,"Portland, Oregon","Well, I'm glad you got a good haircut, America's in the toilet. Is this not a conversation @Haylie_Bre and I woul https://t.co/6dVIIKZcgv"


## *Remove all users that have less than 20 tweets. You may want to keep the entire tweet content, including hashtags/handles.*

In [6]:
# Use group by and count to retrive amounts of tweets for each individual UserID from database.
top_users = tweets.groupby('UserID').Text.count().sort_values(ascending=False)

# Include users with >= 20 tweets.
top_users = top_users[top_users>=20]

top_users.head()

UserID
1971604699    3018
15501305      2493
17197477      2321
1732989216    2210
23719043      2133
Name: Text, dtype: int64

In [7]:
# Refresh dataframe with UserID that have 20 tweets or more.
# Using isin, if the userID is in the 'top_users' list.
tweets =  tweets[tweets['UserID'].isin(top_users.index.tolist())]

tweets.head()

Unnamed: 0,UserID,Location,Text
0,1519696717,"Little Rock, Arkansas",@NWAJimmy I've read it now though brother. Was pretty spot on Lots of bright spots but a lot to work on. Exactly as an exhibition should be!
6,106568768,"Dallas, TX",#infosec #Intel #ACM #IEEE Impacts Haswell microarch. Paper proposes mitigations that could prevent BTB-based side https://t.co/DW6vgRAPrv
7,17101060,The Universe,"Hacked e-mails show Clinton campaigns fears about Sanders | https://t.co/WMyCHuCDIc The Philippine Star (PhilippineStar) October 28, 2"
10,16818809,Long Island and beyond...,Hulk smash!
12,163712593,"Portland, Oregon","Well, I'm glad you got a good haircut, America's in the toilet. Is this not a conversation @Haylie_Bre and I woul https://t.co/6dVIIKZcgv"


In [8]:
new_tweets = tweets.groupby(['UserID',"Location"], as_index=False).sum()

new_tweets.head()

Unnamed: 0,UserID,Location,Text
0,1737,In my head,"Listening to @nehanarkhede talk about event streams at #ATO2016 and thinking about how to continue evolving our Kafka-based pipeline.Related, man would I love to have @erinscafe as a coach for my kids.If youre interested in processing streaming data at scale, @MailChimp is hiring: https://t.co/MYxL4mCkaC Come see us if youre at #ATO2016.@rachaelmaddux Just when I think I couldnt love her more.If you coach or have kids in youth sports, this is well worth reading. https://t.co/BEdjKmZ28H@_raven_io @pwnela Im well past the three month mark and still not sure Im setwhich is cool, but could be distressing if unexpected.Moving on to discussion of IoT, ML, and AR by @GregU. (Lots of buzzwords there.) Just learning of @PTC's acquisition of @Vuforia. #ATO2016Wish @nehanarkhede had asked how many were actively using Kafka. I suspect it would have still been sizable. https://t.co/OVy8zXHeiT@erinscafe @salrelish I'm pretty reserved, but if this happened to a team I coached or had kid onI don't know how I'd handle it. Appalling.@KyFaSt Congrats! Super happy for you and happy to get to work with you as any sort of engineeror just plain person.Starting day at #ATO2016. Looking forward to keynotesparticularly @nehanarkhede and talks through the day. Come see us at @MailChimp booth@SwiftOnSecurity @dcloues Wow. Imagine the world in which cat pictures are the stupidest thing. Can one get transferred into that world?@nehanarkhede @jessfraz Wondered whether you'd mention in your keynote. LOLed when you did.Also related, youth sports is full of coaches like @erinscafe who give so much time and care to kids. The bad ones shouldn't be tolerated.@nehanarkhede Im hoping that wasnt as cringeworthy at I fear it was. Looking forward to your keynote.Just listened to @solrac901 discuss @ApacheSpot. Thinking of possible new applications for ML at @MailChimp. #ATO2016@adickerson @ComfortablySmug I could not be more intrigued than by a topic recommended by both Drudge and @jdickerson.@skamille cats don't care. Just look at the expression.@Michael_Tsunam1 @samnesmith Important clarification. I was concerned for a bit there.@bakins Im so sorry.@JessicaMauerhan If youre still around and interested, I held some swag for you at the booth. Were to the right as you enter building."
1,2391,San Francisco,"@NathanFGao The generous view: Maybe it's just impossible to lead two public companies at the same time well.If Huma Abedin can get out of bed and face the world every day, then nothing should be stopping me.@bonaventuresoft oh no god no. Worst job. I appreciate the sentiment!@presserb @petshopboys the fox in oaklandBottle of Fat Tire I found in the back of my fridge when I got home, I love you.Facebook lets advertisers exclude users by race https://t.co/DDmz3feofO@eldescanso I normally do all my work in google.Uninstall @ Mule Gallery https://t.co/UuNJKfChkv@buzz so good!@andymatic Yeaaah. It's sadly Yahoo!-like. A lot of people are using it, but we haven't deciding what it's for.Going to @petshopboys tonight! I'll be the one in front of the stage who smells like wet dog.@mknepprath Oh thanks. It just exposes the flaw in that sort of rating system.@dansinker Then you are living some Harlan Ellison level science horror.California is ridiculous. It's not like we're out of room, not by a longshot. I bet housing density is good for water conservation too.See also: People who think ""gay"" is an insult. https://t.co/sW73Mw9KGXGOAT Salon with Annalee Newitz! https://t.co/lSTSJiXxtOThanks to internet trolls, that episode of Black Mirror seems so real. (Fortunately, Google reviews aren't a major https://t.co/saJCkbUmrn@willsh well done, sirPet Shop Boys. Still going strong. https://t.co/vGZQUkmaoo@j3sse_pub @petshopboys Go West was the pre encore closer. I got right up to the rail for that one.@thelastwalt fight them!!!The first step to clear communication is a clear goal. It's surprising how many organizations lack sufficiently clear goals.Fun fact: Franz Kafka was a workers' compensation insurance claims officer.Is a sleeping bag a sandwich?Happy place https://t.co/FmLDQVavBJ@tonx the frightful paradox of 2016@MikeIsaac @fmanjoo Start with this! Easy plus tasty plus fast. https://t.co/3YS56Ee50TVice has fewer neonazis. https://t.co/OZsWAkeFnXI tried it when I couldn't sleep. Worked! Neuroscience Says Listening to This Song Reduces Anxiety by Up to 65% https://t.co/kHLOQHVgIN@migurski yes!! Me tooThis is upsetting. https://t.co/OCsq9LFU9e@rtraister Very little of it joyful.@atrubens Maybe @twitter is bad at you.@j3sse_pub @petshopboys here you go! https://t.co/yWFqbGDS0jDAMMIT. That untitled document I just closed without saving were my rescued revisions. I didn't realize my machine crashed. fml@clearwriter I never work in Word anymore. I'm always in the cloud, so I've lost my twitchy save finger.And man, there are gross exes. There are gross exes who pop up in your professional life. And then there's what she is dealing with.Tonight's vocabulary term is ""American Bottom"" https://t.co/UGSq0Wuy7vRaise your hand if you're sadly resigned to @twitter's ultimate demise.What's this? https://t.co/wXViGJFlSh@timbuckwalter thats the part of our operation thats#'s open to the publicAnti-aging lasers come in more than one variety. https://t.co/MbzTwPs5Ms@scarequotes Makes you think, huh?When I'd get anxious, I used to think about how hard Obama's days were and how he got through it. Then I switched to Hillary. Now it's Huma.In the public pool shower, I stepped on something sharp. Pulled a tiny four-leaf clover ear stud out of my foot. Unpacking the symbolism.This video gives new meaning to ""party of Lincoln"" https://t.co/ms69JX2E1n h/t @kathrynyu@summersumz thank you!@steveportigal @billder OK, now that's going on a mug.@NathanFGao I don't think so. But I think there was a hope that jack had special powers. Some return of the king myth.I agree. Twitter users do amazing things using the platform. Product approach is ""Now, respond to hate speech with https://t.co/PmaSqww9aH@robynkanner Ice cream@thelastwalt (Do not fight them)@clearwriter I am so cranky now.@hhavrilesky @TheCut It continues to creep me out when people refer to me as my dog's ""mom"", which is the common parlance in SF.Also, you have to know your audience. A spreadsheet full of responses to a marketing survey is not that.In case there was any doubt bros are currently in charge of the future https://t.co/oiMCuLeeis@tonx right?!?@bjheinley @kowitz Boy do I have answer for you! https://t.co/zQwBhuHVqYThe pleasant is the enemy of the transcendent. https://t.co/GgHfTaHGJB@yodamay This could be my new favorite band.Tell all your friends in Berlin. November 7th, go to my delightful, practical workshop on collaborative research! https://t.co/6ZLGeeFABU@BenKennerly Hmm, I'll put that investigation in my queue.It started in the office across the hall from us. Another era.@michael One of the reviewers gave us 5 stars for the free clinic and sex work.@MaxTemkin just a couple more weeks@lmc @nczeitgeist They should rebrand to ""The Clowns"" in honor of 2016@kathrynyu @dansays Well, now I have a new life list item! Seeing that!Listening to Piketty's *Capital* is relaxing.The most mundane frustrations have inspired great art.I suddenly have 2 xtra GA tix. If you and a friend can be near the Fox in an hour, they can be yours.@tonx sounds rightMaybe we should read these aloud at Mule Nog this year, like Mean Tweets. https://t.co/6ViwRJZLodThere is only one communication strategy. Be specific, interesting, & useful to your audience. Yes, this is hard. No, there is no shortcut.This is the only media I am going to expose myself to for the remainder of 2016. https://t.co/94pMUKSYX5NB: @Annaleen recommends Ghostland: An American History in Haunted Places by Colin Dickey https://t.co/eUB0dwLcQ5In this impassioned plea, @TimGunn is talking fashion, but it applies to all designers who disdain ""regular"" people: https://t.co/MNvCm941oS@atrubens @twitter Prepare your torches, I guess.@mnik bugs bunny style!"
2,2426,SF,"@MikeIsaac Maybe the dog is cashing the checks Mike.@thelancearthur So good.Yeah. It was. https://t.co/CW84E3GB7bWhen your mom opens her mouth. https://t.co/syNVYmvGrqAnd for the love of god, stop trying to make the Apple Watch a thing. Its not a thing. Its Apples third nipple. Move on.This is true only in a world where men and women are held to the same standard. https://t.co/vAU7bqRFOZ@FFWglobal Hey! Been trying to get in touch with you people. Gimme a shout. (It's a good thing.)@SenFeinstein, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweet@SenFeinstein, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetDesigners have conflated being in the service industry with being servants. We stopped solving problems and became lapdogs to luxury brands.@NancyPelosi, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetFind your voice. It matters and we need it. New from @abookapart: Demystifying Public Speaking https://t.co/bks79xQJKWYou also arent required to defend how advertising works. https://t.co/yFEnQk76z3@SenatorBoxer, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetThe best strategy the @GOP has come up with against a VERY qualified woman is the men in her life are idiots. [many women are nodding]@united You are bad at what you do. Amazingly bad.@designhawg @JoeGermuska @knightlab Awesome. We need them to do good work.Facebook allows advertisers to exclude users by race. Exclude. Users. By. Race. https://t.co/aZLejiqmByYou can either design for brands or you can design for people. You cant do both.@MikeIsaac OMG thats rich.Leaving your mark on everything like a spraying tomcat isnt creativity. Its just entitlement writ large.@NancyPelosi, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweet@NancyPelosi, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetPlease try to remember that Julian Assange is fighting extradition for RAPE charges. Hes not a patriot. Hes a selfish entitled asshole.Hey, thanks for spoiling that new Walking Dead episode, everybody.@Real_TJ_Thomas @gruber Well, yeah. I think we all agree there.Guess what, dudes? You were BORN privileged. As the great Ann Richards once said, you were born on third base, thinking you hit a triple.EVEN IF all the failures Trump accuses Hillary of ARE true (theyre not) theyd still be just a small fraction of his ACTUAL failures.This email bullshit is why LBJ had all his conversations on the shitter.@SenatorBoxer, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweet@parisvega @espiekermann No. You cant even compare the two.@MikeHosier Yeah, the luxury watch did great. And NO.@ChappellTracker Were talking about Frozen, right?@drwave @beaucolburn Id throw in #choad too.@halvorson Is that Gilmore Girls?Do not argue with eggs.@NancyPelosi, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweetI honestly cant take two more weeks of this shit.Is anyone here a brand manager? https://t.co/IFqJc90E5h@designhawg One would think.@SenFeinstein, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetSo what? Its illegal for one. https://t.co/gszEoiwAItI thought about it. Youre an idiot. https://t.co/iN1WPXqga6Agreed. https://t.co/VIMRejMlzG@SenatorBoxer, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweetWhats wrong with the Comp Kickstarter? It doesnt pass the Warhol Coke test. We dont need luxury versions of comm https://t.co/S81O3jTO6uYou can buy this rifle RIGHT NOW on @facebook. https://t.co/J7lN7Fde0aand when youre running a gun marketplace, as @facebook is, you can target your gun sales to whites only. https://t.co/ihUwWZBZAZEverything. https://t.co/OT7X69AiraTheres no happier sight in sports than Tony Romo holding a clipboard.@SenatorBoxer, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweetWere CITIZENS, asshole. CITIZENS. https://t.co/t6gFZgNoKFIts sooooo true. https://t.co/7PgG6E0q1C@sjarvis Thank YOU.The @GOP promises to go out guns blazing. Because theyre not responsible for enough gun deaths across America. https://t.co/sN3foOCTel@united Please learn how to run an airline.Hi. We are in the middle of a coup. This is how it happens.@NancyPelosi, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweet@zeldman Still not a fan. These things say more about designers ineptitude than client issues.@natts @astronautpnguin Ok, Dave. Correct me. Was I wrong? I will apologize if I was.Oh shit! I need a ride to the burn unit! https://t.co/zmBd7C5w31Remember that thing about @facebook allowing advertisers to target by race? Someone used it! https://t.co/QnIPzgYPxL@matt_timmons @austinkleon I AM kidding. I was going to watch it later, but after what Im hearing nope.When you refer to job cuts as a percentage you are dehumanizing people. This is 350 PEOPLE. It matters. https://t.co/2yzzE8DtFj@gruber We should reinstate Pete Rose and ban YOU from baseball. Just the worst fan.And when you fire 350 people and your investors cheer you need to wonder about what kind of people youre in bed with. Fuck Wall St.This election has revealed that men can stumble dick-first into success while women have to walk a tightrope to just have a chance.That Composition book kickstarter reminds me of the time a restaurant tried to sell me a $20 pretzel. Fuck your $20 pretzel. Cheap is best.Hey @facebook! Half these posts LITERALLY have guns for sale in the title. Thats hard to flag how?!? https://t.co/vE01yJXLgODesigners need to focus on fixing problems for those who need help, not creating luxury versions of things that work just fine.@de5igner @madebyfew Yeah, Im not crazy about this. And a little concerned that the conference retweeted it.Trust me, Im capable of being angry about two things. Three if you wanna press it. https://t.co/lPhfz5LQtBLove it. https://t.co/ppT3EELz5Q@gruber Sigh. You did. Pretty great.The last few days have the tinge of a coup on them. Like democracy is being pulled out from under us.Fair enough. Ill go do that. And hey! You go fuck yourself! https://t.co/8POhLNmYlqCheck out #gunsonfacebookrightnow and see how seriously @facebook is taking its gun ban. They could pull these tonight if they wanted to.My friend @romanmars makes good radio. Join me in supporting Radiotopia 2016. https://t.co/Ri5XJjFDbYLooks like I've had my last Yuengling. https://t.co/L40YuaGEeo@SenatorBoxer, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetYou can buy this RIGHT NOW on @facebook. https://t.co/KO7k3vFBO9LOL. I havent even made the joke about how much designers love making things with lay-flat spines. Way to project.Says the man who felt the need to jump on Twitter to continue berating a woman. https://t.co/bxTn2fwstF@mat Well, actually Mat. In fairness, were not racist, but all lives matter.Hey fools! Go see this movie! my friend @sonia__harris designed a bunch of stuff for it. She's all talented and shi https://t.co/B5YcOJN94TDesigners, I only pick on you because I love you and I know youre capable of so much more. But somewhere along the line you forgot.and right now, people need you like they havent needed you in a long time.Help me with a thing. Go to @facebook. Find a gun for sale. Post a screenshot here with #gunsonfacebookrightnow.White men realize they dont HAVE to come to the defense of tech corporations, right?@SenFeinstein, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweet@SherylCababa @austinkleon Yes, but when the disenfranchised pull out sharpies at a restaurant and mark the plates they get arrested.@dansinker No. meet me in Berlin next week. Well start a whole government in exile thing.@SenFeinstein, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweetMaybe stop building things for Wall St and go back to making things people need.An update on taking down @facebooks guns from @jbsibley: https://t.co/aBQiTYDxC7I block gun nuts, racists, alt-right pricks, and mens group turds. Subscribe to my @blocktogether and benefit : https://t.co/sRLiJetnnd@gruber Only YOU would root for Cleveland over Americas sweethearts.@ToddRoss Yeah, it was visceral enough in the comic. And I hear its worse on film. I don't really feel a need to experience that.All those times you were interrupted? Told to smile more? Passed over for a job? In 10 days you can elect Americas first woman president.Some of you may not realize that the 2016 Mansplaining Olympics started this morning. https://t.co/IJWyUclVQlBlatant lie. They harass and suspend us for reporting illegal gun sales. https://t.co/KVR5dSvR88Cubs be cubbin.Thanks to everyone at #MxF2016 for being an amazing audience today."
3,11569,Jet Life Stadium,"False start... good God.Jets could activate Devin Smith, Breno Giacomini from PUP list vs. Browns, says Todd Bowles #jets https://t.co/BRwGm4MueQI'm about to call an audible... ""KILL KILL"" #Jets https://t.co/na0J8xRzkaGoal to go. Browns just flew down the field. #JetsBilal Powell's Huge 35-Yard TD Run! | #Jets https://t.co/lJoDPzLQdvFireman Ed sighting #Jets https://t.co/Ah39mI6rmMWell that's a great start.@pyetti My wife put me in the padded room.3 and out! GRRRRRRRR@KirkCassels we brought it up on this showLook how empty that stadium is! A combined 2 wins will do that...Still waiting for @TheSheikh to post photos... :(They are taking Fitz out to check him. Here's Petty.Revis getting fried again.@KirkCassels thx manOMG he was killed #JetsGame day!Just launched new podcast: ""244: Pissed-Patrick"" at https://t.co/sjrztrkFFT #Jets #PissedPatrickNice catch Peake.Mangold in walking boot, unlikely to play Sunday https://t.co/TtKjfU7H9G #jetsI think he has no skin on his nose.@BrandonCappelli tell him I said heyThese receivers are doing whatever they want. TACKLE! #JetsPetty makes his NFL debut. #JetsWe made them look like the Patriots. https://t.co/YbGv4g2bwWRaking the leaves is looking good soon. ugh@120Terp very visual#J_E_T_S_JETS_JETS_JETS!! LET'S GO!!!!! #JETSPOWELL! BOOM! TDWell the #Jets looked HORRENDOUS!OMG Come one!!! Working on Pryor.If the #Jets go 3 and out... I'll lose it.Another 3 and out. #jetsSee... I've been saying trade him and get some picks. https://t.co/SXCJr6M6yaWashington Coach Pulls Out Dong, Pees Freely Before The Masses During Game - Former Jets Coach. https://t.co/PFkARF6WGZNo Huddle #JetsAny Jets fan ever feel like we are on the WORST roller coaster ride ever imagined?I hate it when Mangold doesn't play...And that's a WGottlieb: Will Darrelle Revis retire after this year? https://t.co/QE3DMbtuy0Garbage TDNY Jets Injury Update: Nick Mangold and Muhammad Wilkerson https://t.co/t4aUpYT7BL5 straight plays of nothing to start the game. #JetsPowell has always been explosive.Twitter: 70 Percent Of NFL Live Stream Audience Under 35 https://t.co/QZ0ykxB7igBecause of the challenge of the spot, Fitz can slip back in with not missing a play. Better luck next time Petty. No NFL debut. #JetsStandford is running around everywhere..REVIS! YOU ARE BROKEN!Who's ready for two consecutive wins?! #JetsRyan Fitzpatrick back under center for Jets, ready to ""show them"" https://t.co/jnFT7ipTcSI'm about to throw something.Guess I'm watching the game... https://t.co/OgpW2myHAkMy thoughts on the #Jets QB situation ---->https://t.co/sjrztrCgxrFitz goes right back in... No Petty.Kill Kill@DaveKrayhem I don't... sorry... I'm from PAChipping away. Gain of 4. #Jets"
4,14763,"Portland, Oregon, USA","@mwhuss As an introvert, I actually like the alone time. Also, cuts down on commute time :)@zuhrisaifudin45 Thank you!@garrettdimon Great hustle man!Working on Project One for the @treehouse Full Stack JavaScript Techdegree Code: https://t.co/o48Dh8Irmy https://t.co/378NfCcsShBig props to Wayne for bringing everyone together to talk about improving diversity and https://t.co/vnnxryFLP3@bencareynyt Hi Benedict - I'd like to invite you to be our featured guest on the Educate Yourself podcast :) https://t.co/50qhAxrjBi@kristina_basham @treehouse is an online affordable technology school with 75,000 students. We take people from zero to job ready :)This carpet of Ginkgo Tree leaves is just amazing #fall #nofilter #portland #beauty https://t.co/Dg2UhtFA4DAshley did it. Your turn. https://t.co/jiWad3u9Pr@wjgilmore GoruckWe have more amazing guests lined up for the Educate Yourself podcast! :) Android: https://t.co/CAjl9a4h5v iTunes: https://t.co/50qhAxrjBiWhat I carry around in my backpack all the time #goruck #fitness #spartan #training https://t.co/lwokqKcTZW@KatieMSmith I'd have to check - I'm not sure actually!@zapatoche Never because I use a @GORUCK. Lifetime guaranteedI think it's time to buy some Microsoft stock. Apple might be on the long term decline.Using the All Gender Restroom today was strange at first but then it became totally normal. It https://t.co/j2Hd9OKv1W.@superbooked looks really exciting. Great work @danielmallPlease subscribe and rate our new podcast: #EducateYourself Thanks! iTunes: https://t.co/50qhAxrjBi Play Store: https://t.co/CAjl9a4h5v@nikiforovalex18 Congrats!!! It's so awesome what you've achieved.Monty, our Great Dane, finally got ahold of my slippers. Doesn't stop me from wearing them to https://t.co/qQZfHhh2ndHere's to all you Tesla-Elon-doubters. Bam :) https://t.co/bSJ1B9JDym@danielmall @AaronGustafson @davatron5000 Excited to hear how this goes. My excitement and loyalty to Apple is waning.See you in one hour! :) https://t.co/VxvlVXepM1@hihellosm Wireless Beats. Love emI got the opportunity to speak to an awesome group of high school kids about jobs, technology https://t.co/Jmk2t57kto@codejake Just changed that to ""use"". Good call :)An amazing piece of art depicting Martin Luther King Jr, made of dominos https://t.co/DwTSlVlB3Y@wjgilmore Ah I think it's the GR1Gettin' my learn on. Working on Project One in the Treehouse Full Stack JavaScript Techdegree https://t.co/mJ4TWJNdOoI shared one of my best productivity tips here: ""How to use your calendar to maximize your effectiveness https://t.co/4bca9h5Kh2How to hack your calendar to maximize your effectiveness https://t.co/hNVcwghHpO #effectiveness #productivity #timemanagementBlast from the past. 15-year old me, getting my Eagle Scout award in Boy Scouts. Couldn't have https://t.co/fF6dpmMkcZ"


In [9]:
# For ground truth. 
tweet_cols2 = ['UserID', 'Supporter']
ground_truth = pd.read_table('clinton_trump_user_classes.txt', names=tweet_cols2, encoding ="ISO-8859-1")

In [10]:
# Show 'Supporter' column by merge.
merge_tweets = pd.merge(new_tweets, ground_truth, on='UserID')

merge_tweets.head()

Unnamed: 0,UserID,Location,Text,Supporter
0,1737,In my head,"Listening to @nehanarkhede talk about event streams at #ATO2016 and thinking about how to continue evolving our Kafka-based pipeline.Related, man would I love to have @erinscafe as a coach for my kids.If youre interested in processing streaming data at scale, @MailChimp is hiring: https://t.co/MYxL4mCkaC Come see us if youre at #ATO2016.@rachaelmaddux Just when I think I couldnt love her more.If you coach or have kids in youth sports, this is well worth reading. https://t.co/BEdjKmZ28H@_raven_io @pwnela Im well past the three month mark and still not sure Im setwhich is cool, but could be distressing if unexpected.Moving on to discussion of IoT, ML, and AR by @GregU. (Lots of buzzwords there.) Just learning of @PTC's acquisition of @Vuforia. #ATO2016Wish @nehanarkhede had asked how many were actively using Kafka. I suspect it would have still been sizable. https://t.co/OVy8zXHeiT@erinscafe @salrelish I'm pretty reserved, but if this happened to a team I coached or had kid onI don't know how I'd handle it. Appalling.@KyFaSt Congrats! Super happy for you and happy to get to work with you as any sort of engineeror just plain person.Starting day at #ATO2016. Looking forward to keynotesparticularly @nehanarkhede and talks through the day. Come see us at @MailChimp booth@SwiftOnSecurity @dcloues Wow. Imagine the world in which cat pictures are the stupidest thing. Can one get transferred into that world?@nehanarkhede @jessfraz Wondered whether you'd mention in your keynote. LOLed when you did.Also related, youth sports is full of coaches like @erinscafe who give so much time and care to kids. The bad ones shouldn't be tolerated.@nehanarkhede Im hoping that wasnt as cringeworthy at I fear it was. Looking forward to your keynote.Just listened to @solrac901 discuss @ApacheSpot. Thinking of possible new applications for ML at @MailChimp. #ATO2016@adickerson @ComfortablySmug I could not be more intrigued than by a topic recommended by both Drudge and @jdickerson.@skamille cats don't care. Just look at the expression.@Michael_Tsunam1 @samnesmith Important clarification. I was concerned for a bit there.@bakins Im so sorry.@JessicaMauerhan If youre still around and interested, I held some swag for you at the booth. Were to the right as you enter building.",1
1,2391,San Francisco,"@NathanFGao The generous view: Maybe it's just impossible to lead two public companies at the same time well.If Huma Abedin can get out of bed and face the world every day, then nothing should be stopping me.@bonaventuresoft oh no god no. Worst job. I appreciate the sentiment!@presserb @petshopboys the fox in oaklandBottle of Fat Tire I found in the back of my fridge when I got home, I love you.Facebook lets advertisers exclude users by race https://t.co/DDmz3feofO@eldescanso I normally do all my work in google.Uninstall @ Mule Gallery https://t.co/UuNJKfChkv@buzz so good!@andymatic Yeaaah. It's sadly Yahoo!-like. A lot of people are using it, but we haven't deciding what it's for.Going to @petshopboys tonight! I'll be the one in front of the stage who smells like wet dog.@mknepprath Oh thanks. It just exposes the flaw in that sort of rating system.@dansinker Then you are living some Harlan Ellison level science horror.California is ridiculous. It's not like we're out of room, not by a longshot. I bet housing density is good for water conservation too.See also: People who think ""gay"" is an insult. https://t.co/sW73Mw9KGXGOAT Salon with Annalee Newitz! https://t.co/lSTSJiXxtOThanks to internet trolls, that episode of Black Mirror seems so real. (Fortunately, Google reviews aren't a major https://t.co/saJCkbUmrn@willsh well done, sirPet Shop Boys. Still going strong. https://t.co/vGZQUkmaoo@j3sse_pub @petshopboys Go West was the pre encore closer. I got right up to the rail for that one.@thelastwalt fight them!!!The first step to clear communication is a clear goal. It's surprising how many organizations lack sufficiently clear goals.Fun fact: Franz Kafka was a workers' compensation insurance claims officer.Is a sleeping bag a sandwich?Happy place https://t.co/FmLDQVavBJ@tonx the frightful paradox of 2016@MikeIsaac @fmanjoo Start with this! Easy plus tasty plus fast. https://t.co/3YS56Ee50TVice has fewer neonazis. https://t.co/OZsWAkeFnXI tried it when I couldn't sleep. Worked! Neuroscience Says Listening to This Song Reduces Anxiety by Up to 65% https://t.co/kHLOQHVgIN@migurski yes!! Me tooThis is upsetting. https://t.co/OCsq9LFU9e@rtraister Very little of it joyful.@atrubens Maybe @twitter is bad at you.@j3sse_pub @petshopboys here you go! https://t.co/yWFqbGDS0jDAMMIT. That untitled document I just closed without saving were my rescued revisions. I didn't realize my machine crashed. fml@clearwriter I never work in Word anymore. I'm always in the cloud, so I've lost my twitchy save finger.And man, there are gross exes. There are gross exes who pop up in your professional life. And then there's what she is dealing with.Tonight's vocabulary term is ""American Bottom"" https://t.co/UGSq0Wuy7vRaise your hand if you're sadly resigned to @twitter's ultimate demise.What's this? https://t.co/wXViGJFlSh@timbuckwalter thats the part of our operation thats#'s open to the publicAnti-aging lasers come in more than one variety. https://t.co/MbzTwPs5Ms@scarequotes Makes you think, huh?When I'd get anxious, I used to think about how hard Obama's days were and how he got through it. Then I switched to Hillary. Now it's Huma.In the public pool shower, I stepped on something sharp. Pulled a tiny four-leaf clover ear stud out of my foot. Unpacking the symbolism.This video gives new meaning to ""party of Lincoln"" https://t.co/ms69JX2E1n h/t @kathrynyu@summersumz thank you!@steveportigal @billder OK, now that's going on a mug.@NathanFGao I don't think so. But I think there was a hope that jack had special powers. Some return of the king myth.I agree. Twitter users do amazing things using the platform. Product approach is ""Now, respond to hate speech with https://t.co/PmaSqww9aH@robynkanner Ice cream@thelastwalt (Do not fight them)@clearwriter I am so cranky now.@hhavrilesky @TheCut It continues to creep me out when people refer to me as my dog's ""mom"", which is the common parlance in SF.Also, you have to know your audience. A spreadsheet full of responses to a marketing survey is not that.In case there was any doubt bros are currently in charge of the future https://t.co/oiMCuLeeis@tonx right?!?@bjheinley @kowitz Boy do I have answer for you! https://t.co/zQwBhuHVqYThe pleasant is the enemy of the transcendent. https://t.co/GgHfTaHGJB@yodamay This could be my new favorite band.Tell all your friends in Berlin. November 7th, go to my delightful, practical workshop on collaborative research! https://t.co/6ZLGeeFABU@BenKennerly Hmm, I'll put that investigation in my queue.It started in the office across the hall from us. Another era.@michael One of the reviewers gave us 5 stars for the free clinic and sex work.@MaxTemkin just a couple more weeks@lmc @nczeitgeist They should rebrand to ""The Clowns"" in honor of 2016@kathrynyu @dansays Well, now I have a new life list item! Seeing that!Listening to Piketty's *Capital* is relaxing.The most mundane frustrations have inspired great art.I suddenly have 2 xtra GA tix. If you and a friend can be near the Fox in an hour, they can be yours.@tonx sounds rightMaybe we should read these aloud at Mule Nog this year, like Mean Tweets. https://t.co/6ViwRJZLodThere is only one communication strategy. Be specific, interesting, & useful to your audience. Yes, this is hard. No, there is no shortcut.This is the only media I am going to expose myself to for the remainder of 2016. https://t.co/94pMUKSYX5NB: @Annaleen recommends Ghostland: An American History in Haunted Places by Colin Dickey https://t.co/eUB0dwLcQ5In this impassioned plea, @TimGunn is talking fashion, but it applies to all designers who disdain ""regular"" people: https://t.co/MNvCm941oS@atrubens @twitter Prepare your torches, I guess.@mnik bugs bunny style!",1
2,2426,SF,"@MikeIsaac Maybe the dog is cashing the checks Mike.@thelancearthur So good.Yeah. It was. https://t.co/CW84E3GB7bWhen your mom opens her mouth. https://t.co/syNVYmvGrqAnd for the love of god, stop trying to make the Apple Watch a thing. Its not a thing. Its Apples third nipple. Move on.This is true only in a world where men and women are held to the same standard. https://t.co/vAU7bqRFOZ@FFWglobal Hey! Been trying to get in touch with you people. Gimme a shout. (It's a good thing.)@SenFeinstein, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweet@SenFeinstein, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetDesigners have conflated being in the service industry with being servants. We stopped solving problems and became lapdogs to luxury brands.@NancyPelosi, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetFind your voice. It matters and we need it. New from @abookapart: Demystifying Public Speaking https://t.co/bks79xQJKWYou also arent required to defend how advertising works. https://t.co/yFEnQk76z3@SenatorBoxer, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetThe best strategy the @GOP has come up with against a VERY qualified woman is the men in her life are idiots. [many women are nodding]@united You are bad at what you do. Amazingly bad.@designhawg @JoeGermuska @knightlab Awesome. We need them to do good work.Facebook allows advertisers to exclude users by race. Exclude. Users. By. Race. https://t.co/aZLejiqmByYou can either design for brands or you can design for people. You cant do both.@MikeIsaac OMG thats rich.Leaving your mark on everything like a spraying tomcat isnt creativity. Its just entitlement writ large.@NancyPelosi, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweet@NancyPelosi, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetPlease try to remember that Julian Assange is fighting extradition for RAPE charges. Hes not a patriot. Hes a selfish entitled asshole.Hey, thanks for spoiling that new Walking Dead episode, everybody.@Real_TJ_Thomas @gruber Well, yeah. I think we all agree there.Guess what, dudes? You were BORN privileged. As the great Ann Richards once said, you were born on third base, thinking you hit a triple.EVEN IF all the failures Trump accuses Hillary of ARE true (theyre not) theyd still be just a small fraction of his ACTUAL failures.This email bullshit is why LBJ had all his conversations on the shitter.@SenatorBoxer, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweet@parisvega @espiekermann No. You cant even compare the two.@MikeHosier Yeah, the luxury watch did great. And NO.@ChappellTracker Were talking about Frozen, right?@drwave @beaucolburn Id throw in #choad too.@halvorson Is that Gilmore Girls?Do not argue with eggs.@NancyPelosi, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweetI honestly cant take two more weeks of this shit.Is anyone here a brand manager? https://t.co/IFqJc90E5h@designhawg One would think.@SenFeinstein, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/NbtGsxMYLq #shootatweetSo what? Its illegal for one. https://t.co/gszEoiwAItI thought about it. Youre an idiot. https://t.co/iN1WPXqga6Agreed. https://t.co/VIMRejMlzG@SenatorBoxer, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweetWhats wrong with the Comp Kickstarter? It doesnt pass the Warhol Coke test. We dont need luxury versions of comm https://t.co/S81O3jTO6uYou can buy this rifle RIGHT NOW on @facebook. https://t.co/J7lN7Fde0aand when youre running a gun marketplace, as @facebook is, you can target your gun sales to whites only. https://t.co/ihUwWZBZAZEverything. https://t.co/OT7X69AiraTheres no happier sight in sports than Tony Romo holding a clipboard.@SenatorBoxer, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweetWere CITIZENS, asshole. CITIZENS. https://t.co/t6gFZgNoKFIts sooooo true. https://t.co/7PgG6E0q1C@sjarvis Thank YOU.The @GOP promises to go out guns blazing. Because theyre not responsible for enough gun deaths across America. https://t.co/sN3foOCTel@united Please learn how to run an airline.Hi. We are in the middle of a coup. This is how it happens.@NancyPelosi, another one killed by guns in Lincoln today. What will you do about it? https://t.co/6hM1dG0BHF #shootatweet@zeldman Still not a fan. These things say more about designers ineptitude than client issues.@natts @astronautpnguin Ok, Dave. Correct me. Was I wrong? I will apologize if I was.Oh shit! I need a ride to the burn unit! https://t.co/zmBd7C5w31Remember that thing about @facebook allowing advertisers to target by race? Someone used it! https://t.co/QnIPzgYPxL@matt_timmons @austinkleon I AM kidding. I was going to watch it later, but after what Im hearing nope.When you refer to job cuts as a percentage you are dehumanizing people. This is 350 PEOPLE. It matters. https://t.co/2yzzE8DtFj@gruber We should reinstate Pete Rose and ban YOU from baseball. Just the worst fan.And when you fire 350 people and your investors cheer you need to wonder about what kind of people youre in bed with. Fuck Wall St.This election has revealed that men can stumble dick-first into success while women have to walk a tightrope to just have a chance.That Composition book kickstarter reminds me of the time a restaurant tried to sell me a $20 pretzel. Fuck your $20 pretzel. Cheap is best.Hey @facebook! Half these posts LITERALLY have guns for sale in the title. Thats hard to flag how?!? https://t.co/vE01yJXLgODesigners need to focus on fixing problems for those who need help, not creating luxury versions of things that work just fine.@de5igner @madebyfew Yeah, Im not crazy about this. And a little concerned that the conference retweeted it.Trust me, Im capable of being angry about two things. Three if you wanna press it. https://t.co/lPhfz5LQtBLove it. https://t.co/ppT3EELz5Q@gruber Sigh. You did. Pretty great.The last few days have the tinge of a coup on them. Like democracy is being pulled out from under us.Fair enough. Ill go do that. And hey! You go fuck yourself! https://t.co/8POhLNmYlqCheck out #gunsonfacebookrightnow and see how seriously @facebook is taking its gun ban. They could pull these tonight if they wanted to.My friend @romanmars makes good radio. Join me in supporting Radiotopia 2016. https://t.co/Ri5XJjFDbYLooks like I've had my last Yuengling. https://t.co/L40YuaGEeo@SenatorBoxer, another one killed by guns in Los Angeles today. What will you do about it? https://t.co/V5lJ1hWcCf #shootatweetYou can buy this RIGHT NOW on @facebook. https://t.co/KO7k3vFBO9LOL. I havent even made the joke about how much designers love making things with lay-flat spines. Way to project.Says the man who felt the need to jump on Twitter to continue berating a woman. https://t.co/bxTn2fwstF@mat Well, actually Mat. In fairness, were not racist, but all lives matter.Hey fools! Go see this movie! my friend @sonia__harris designed a bunch of stuff for it. She's all talented and shi https://t.co/B5YcOJN94TDesigners, I only pick on you because I love you and I know youre capable of so much more. But somewhere along the line you forgot.and right now, people need you like they havent needed you in a long time.Help me with a thing. Go to @facebook. Find a gun for sale. Post a screenshot here with #gunsonfacebookrightnow.White men realize they dont HAVE to come to the defense of tech corporations, right?@SenFeinstein, 2 people were injured by guns in Los Angeles today. What will you do about it? https://t.co/EBk9OEqBp4 #shootatweet@SherylCababa @austinkleon Yes, but when the disenfranchised pull out sharpies at a restaurant and mark the plates they get arrested.@dansinker No. meet me in Berlin next week. Well start a whole government in exile thing.@SenFeinstein, another one killed by guns in Sacramento today. What will you do about it? https://t.co/5NLbMxJmnn #shootatweetMaybe stop building things for Wall St and go back to making things people need.An update on taking down @facebooks guns from @jbsibley: https://t.co/aBQiTYDxC7I block gun nuts, racists, alt-right pricks, and mens group turds. Subscribe to my @blocktogether and benefit : https://t.co/sRLiJetnnd@gruber Only YOU would root for Cleveland over Americas sweethearts.@ToddRoss Yeah, it was visceral enough in the comic. And I hear its worse on film. I don't really feel a need to experience that.All those times you were interrupted? Told to smile more? Passed over for a job? In 10 days you can elect Americas first woman president.Some of you may not realize that the 2016 Mansplaining Olympics started this morning. https://t.co/IJWyUclVQlBlatant lie. They harass and suspend us for reporting illegal gun sales. https://t.co/KVR5dSvR88Cubs be cubbin.Thanks to everyone at #MxF2016 for being an amazing audience today.",1
3,11569,Jet Life Stadium,"False start... good God.Jets could activate Devin Smith, Breno Giacomini from PUP list vs. Browns, says Todd Bowles #jets https://t.co/BRwGm4MueQI'm about to call an audible... ""KILL KILL"" #Jets https://t.co/na0J8xRzkaGoal to go. Browns just flew down the field. #JetsBilal Powell's Huge 35-Yard TD Run! | #Jets https://t.co/lJoDPzLQdvFireman Ed sighting #Jets https://t.co/Ah39mI6rmMWell that's a great start.@pyetti My wife put me in the padded room.3 and out! GRRRRRRRR@KirkCassels we brought it up on this showLook how empty that stadium is! A combined 2 wins will do that...Still waiting for @TheSheikh to post photos... :(They are taking Fitz out to check him. Here's Petty.Revis getting fried again.@KirkCassels thx manOMG he was killed #JetsGame day!Just launched new podcast: ""244: Pissed-Patrick"" at https://t.co/sjrztrkFFT #Jets #PissedPatrickNice catch Peake.Mangold in walking boot, unlikely to play Sunday https://t.co/TtKjfU7H9G #jetsI think he has no skin on his nose.@BrandonCappelli tell him I said heyThese receivers are doing whatever they want. TACKLE! #JetsPetty makes his NFL debut. #JetsWe made them look like the Patriots. https://t.co/YbGv4g2bwWRaking the leaves is looking good soon. ugh@120Terp very visual#J_E_T_S_JETS_JETS_JETS!! LET'S GO!!!!! #JETSPOWELL! BOOM! TDWell the #Jets looked HORRENDOUS!OMG Come one!!! Working on Pryor.If the #Jets go 3 and out... I'll lose it.Another 3 and out. #jetsSee... I've been saying trade him and get some picks. https://t.co/SXCJr6M6yaWashington Coach Pulls Out Dong, Pees Freely Before The Masses During Game - Former Jets Coach. https://t.co/PFkARF6WGZNo Huddle #JetsAny Jets fan ever feel like we are on the WORST roller coaster ride ever imagined?I hate it when Mangold doesn't play...And that's a WGottlieb: Will Darrelle Revis retire after this year? https://t.co/QE3DMbtuy0Garbage TDNY Jets Injury Update: Nick Mangold and Muhammad Wilkerson https://t.co/t4aUpYT7BL5 straight plays of nothing to start the game. #JetsPowell has always been explosive.Twitter: 70 Percent Of NFL Live Stream Audience Under 35 https://t.co/QZ0ykxB7igBecause of the challenge of the spot, Fitz can slip back in with not missing a play. Better luck next time Petty. No NFL debut. #JetsStandford is running around everywhere..REVIS! YOU ARE BROKEN!Who's ready for two consecutive wins?! #JetsRyan Fitzpatrick back under center for Jets, ready to ""show them"" https://t.co/jnFT7ipTcSI'm about to throw something.Guess I'm watching the game... https://t.co/OgpW2myHAkMy thoughts on the #Jets QB situation ---->https://t.co/sjrztrCgxrFitz goes right back in... No Petty.Kill Kill@DaveKrayhem I don't... sorry... I'm from PAChipping away. Gain of 4. #Jets",0
4,14763,"Portland, Oregon, USA","@mwhuss As an introvert, I actually like the alone time. Also, cuts down on commute time :)@zuhrisaifudin45 Thank you!@garrettdimon Great hustle man!Working on Project One for the @treehouse Full Stack JavaScript Techdegree Code: https://t.co/o48Dh8Irmy https://t.co/378NfCcsShBig props to Wayne for bringing everyone together to talk about improving diversity and https://t.co/vnnxryFLP3@bencareynyt Hi Benedict - I'd like to invite you to be our featured guest on the Educate Yourself podcast :) https://t.co/50qhAxrjBi@kristina_basham @treehouse is an online affordable technology school with 75,000 students. We take people from zero to job ready :)This carpet of Ginkgo Tree leaves is just amazing #fall #nofilter #portland #beauty https://t.co/Dg2UhtFA4DAshley did it. Your turn. https://t.co/jiWad3u9Pr@wjgilmore GoruckWe have more amazing guests lined up for the Educate Yourself podcast! :) Android: https://t.co/CAjl9a4h5v iTunes: https://t.co/50qhAxrjBiWhat I carry around in my backpack all the time #goruck #fitness #spartan #training https://t.co/lwokqKcTZW@KatieMSmith I'd have to check - I'm not sure actually!@zapatoche Never because I use a @GORUCK. Lifetime guaranteedI think it's time to buy some Microsoft stock. Apple might be on the long term decline.Using the All Gender Restroom today was strange at first but then it became totally normal. It https://t.co/j2Hd9OKv1W.@superbooked looks really exciting. Great work @danielmallPlease subscribe and rate our new podcast: #EducateYourself Thanks! iTunes: https://t.co/50qhAxrjBi Play Store: https://t.co/CAjl9a4h5v@nikiforovalex18 Congrats!!! It's so awesome what you've achieved.Monty, our Great Dane, finally got ahold of my slippers. Doesn't stop me from wearing them to https://t.co/qQZfHhh2ndHere's to all you Tesla-Elon-doubters. Bam :) https://t.co/bSJ1B9JDym@danielmall @AaronGustafson @davatron5000 Excited to hear how this goes. My excitement and loyalty to Apple is waning.See you in one hour! :) https://t.co/VxvlVXepM1@hihellosm Wireless Beats. Love emI got the opportunity to speak to an awesome group of high school kids about jobs, technology https://t.co/Jmk2t57kto@codejake Just changed that to ""use"". Good call :)An amazing piece of art depicting Martin Luther King Jr, made of dominos https://t.co/DwTSlVlB3Y@wjgilmore Ah I think it's the GR1Gettin' my learn on. Working on Project One in the Treehouse Full Stack JavaScript Techdegree https://t.co/mJ4TWJNdOoI shared one of my best productivity tips here: ""How to use your calendar to maximize your effectiveness https://t.co/4bca9h5Kh2How to hack your calendar to maximize your effectiveness https://t.co/hNVcwghHpO #effectiveness #productivity #timemanagementBlast from the past. 15-year old me, getting my Eagle Scout award in Boy Scouts. Couldn't have https://t.co/fF6dpmMkcZ",1


In [11]:
# Dataframe hashtag aggregate. 

# To obtain vector representation of each user, you may group all the 
# hashtags/handles by each user and then create a dataframe where each 
# line has a user and all her hashtags/handles. Then 
# call tfidfVectorzier() on that dataframe.

vectorizer = sk_text.TfidfVectorizer(
    stop_words='english',
    max_features=1000, # 2000 takes a while...
                                     
    # .1 Means "ignore terms that appear in less than 1% of the documents".
                                     
    # The default min_df is 1, which means "ignore terms that appear in less than 1 document". 
    # Thus, the default setting does not ignore any terms.        
    min_df=500, 
                             
    # If .5: ignore terms that appear in more than 50% of the documents.
    #max_df=.9, 
)

matrix = vectorizer.fit_transform(merge_tweets.Text)
print(type(matrix))         
tfidf_data = matrix.toarray() 
print(tfidf_data)

<class 'scipy.sparse.csr.csr_matrix'>
[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.02194481 ... 0.         0.         0.        ]
 ...
 [0.02100724 0.01587684 0.00577047 ... 0.         0.01444637 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [12]:
# Vectorize Location.

vectorizer2 = sk_text.TfidfVectorizer(
    stop_words='english',
    max_features=1000, # Try 2000?
                                     
    # .1 Means "ignore terms that appear in less than 1% of the documents".
                                     
    # The default min_df is 1, which means "ignore terms that appear in less than 1 document". 
    # Thus, the default setting does not ignore any terms.        
    min_df=500, 
                             
    # If .5: ignore terms that appear in more than 50% of the documents.
    #max_df=.9, 
)

matrix2 = vectorizer2.fit_transform(merge_tweets.Location)
print(type(matrix2))         
tfidf_data2 = matrix2.toarray() 
print(tfidf_data2)

<class 'scipy.sparse.csr.csr_matrix'>
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [13]:
count_vect_df = pd.DataFrame(matrix.todense(), columns=vectorizer.get_feature_names())

# Location count vectorizer.
count_vect_df2 = pd.DataFrame(matrix2.todense(), columns=vectorizer2.get_feature_names())

merge_tweets = pd.concat([merge_tweets, count_vect_df], axis=1)
merge_tweets = pd.concat([merge_tweets, count_vect_df2], axis=1)

## *Use train_test_split() to split data into training and test sets, where 20 percent of the records go to test set.*

In [14]:
# Dataframe with hashtags & handles & ground truth.
X = merge_tweets.drop(columns=['UserID','Text','Location','Supporter'])
# Trump or Clinton 'Supporter' values.
y = merge_tweets['Supporter'].values

In [15]:
# Shapes show.
print(X.shape)
print(y.shape)

# 19515 average in total and split 19515 records to 2 parts.

(19515, 1005)
(19515,)


In [16]:
# Split dataset into train and test data.
# Predict. Test the size. How many records you want to go to test data. 20% of the records will go to test data (.2)
# and remaining to to other set. Specify the first 3 parameter correctly.


# random_state: is used for initializing the internal random number generator, which will 
# decide the splitting of data into train and test indices.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=4, stratify=y)

# Task 1.2
- **Train Decision Tree, SVM, Logistic Regression, and Neural Networks.** In your report describe the features that you used for each classifier.

## *Decision Tree*

In [17]:
dtree = tree.DecisionTreeClassifier()
dtree = dtree.fit(X_train, y_train)

# Print out the model.
dtree.fit(X_train, y_train)

# These make predictions on the testing set... 
# Apply model on X_test.
#y_pred = dtree.predict(X_test)

#print("Confusion Matrix:")
#print(metrics.confusion_matrix(y_test, y_pred))
#print('Precision Score:', metrics.precision_score(y_test, y_pred, average= 'weighted'))
#print('Recall Score:', metrics.recall_score(y_test, y_pred, average= 'weighted'))
#print('F1 Score:', metrics.f1_score(y_test, y_pred, average= 'weighted'))            

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

## *SVM*

In [18]:
# Probability: boolean, optional (default=False)  
# Whether to enable probability estimates.

# 2 most important parameters:
# C: float, optional (default=1.0)
# Setting gamma used in kernel to ‘auto’ will use 1 / n features

# Use support vector machine. C and gamma. 
# C: how much penalty to give to misclassfied records.

scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

# 700 got 4min 53s.
clf = SVC(C=1.0, gamma='auto', cache_size=700)

# Print out the model.
clf.fit(X_train, y_train)

#y_pred = clf.predict(X_test)

#print("Confusion Matrix:")
#print(metrics.confusion_matrix(y_test, y_pred)) 
#print('Precision Score:', metrics.precision_score(y_test, y_pred, average= 'weighted'))
#print('Recall Score:', metrics.recall_score(y_test, y_pred, average= 'weighted'))
#print('F1 Score:', metrics.f1_score(y_test, y_pred, average= 'weighted'))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## *Logistic Regression*

In [19]:
# Set a few optional parameter.
lr = LogisticRegression(solver='lbfgs', # Optimized waiting descent.
                        max_iter=200, 
                        multi_class='auto') 

# Print out the model.
lr.fit(X_train, y_train)

#y_pred = lr.predict(X_test)

#print("Confusion Matrix:")
#print(metrics.confusion_matrix(y_test, y_pred)) 
#print('Precision Score:', metrics.precision_score(y_test, y_pred, average= 'weighted'))
#print('Recall Score:', metrics.recall_score(y_test, y_pred, average= 'weighted'))
#print('F1 Score:', metrics.f1_score(y_test, y_pred, average= 'weighted'))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=200, multi_class='auto',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

## *Neural Networks*

In [20]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Multi-layer Perception Model (mlp)
# hidden_layer_sizes: 3 hidden layers, each has 30 neurons.
# 10, 20, 30, or 100... as long as your machine can handle that. Keep as many layer as you want.

mlp = MLPClassifier(hidden_layer_sizes=(30,30,30), 
                    solver='adam', # Adam optimization algo. Variation of gradient descent.
                    max_iter=1000) # Create neural network with hidden layers. Go up to 1000 iterations. 

# Print out the model.
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [21]:
# Output report.
# print(metrics.classification_report(y_test, y_pred))

#print("Confusion Matrix:")
#print(metrics.confusion_matrix(y_test, y_pred, labels=[0, 1]))
#print('Precision Score:', metrics.precision_score(y_test, y_pred, average= 'weighted'))
#print('Recall Score:', metrics.recall_score(y_test, y_pred, average= 'weighted'))
#print('F1 Score:', metrics.f1_score(y_test, y_pred, average= 'weighted'))

# Task 1.3
- **Train k-NN model.** In your report describe the features that you used for k-NN. **Perform parameter tuning on k-NN model. Apply 5-fold cross validation** and use **grid search** to find the best K value for k-NN model. Set scoring metric to F1 score (F-measure). Use the best K value identified from grid search to train your k-NN model. **Plot the F1 score against K value based on the results you achieved from grid search.**

## *Train k-NN Model*

In [22]:
# Create KNN classifier
knn = KNeighborsClassifier(n_neighbors = 12)
# Fit the classifier to the data
knn.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=12, p=2,
           weights='uniform')

In [23]:
knn.score(X_test, y_test)

0.6161926723033564

In [None]:
%%time
# Instantiate the grid.

# Verbose: Verbosity. Higher it is, the more messages.
# Refit: Estimator using the best found parameters on the whole dataset.

# So, the maximum performance that can be obtain always is using n_jobs = -1. | None is default (1).
grid = GridSearchCV(knn, param_grid, cv=5, n_jobs=2, scoring='f1_weighted') # call grid search CV on my model. 5 fold validation (cv=5).

# Fit the classifier to the data.
# knn.fit(X_train,y_train)

grid.fit(X_train, y_train) # Fit the grid with data.
# train this model on your data. every value here to create a model. specify 49 different kvalue/models.

In [None]:
# View the complete results (list of named tuples).
means = grid.cv_results_['mean_test_score']

means

In [None]:
# Plot the results.
plt.plot(range(1,21),means)
plt.xlabel('Value of K for KNN')
plt.ylabel('F1 score based on Cross-Validation')
plt.show()

In [None]:
# Identify the best model.
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

In [None]:
# Train model using all data and the best known parameters.
# Using best n_neighbors from grid search. Not finished.
knn = KNeighborsClassifier(n_neighbors=1, weights='uniform')
knn.fit(X_train, y_train)
y_pred =  knn.predict(X_test)

print(metrics.precision_score(y_test, y_pred, average= 'weighted'))
print(metrics.recall_score(y_test, y_pred, average= 'weighted'))
print(metrics.f1_score(y_test, y_pred, average= 'weighted'))
print(metrics.confusion_matrix(y_test, y_pred))

# Task 1.4
- **Using the test set**, compute the confusion matrix, the precision, recall and F-measure for (1) Decision Tree, (2) SVM, (3) Logistic Regression, (4) Neural Networks, and (5) k-NN. For k-NN model, use the best K value identified from grid search. **Compare their performance and include your conclusions in your report**

In [29]:
#Decision Tree
dtree_pred_class = dtree.predict(X_test)

dtreeConfusion = metrics.confusion_matrix(y_test, dtree_pred_class, labels=[0, 1])
dtreeRecall = metrics.recall_score(y_test, dtree_pred_class)
dtreePrecision = metrics.precision_score(y_test, dtree_pred_class)
dtreeF1 = metrics.f1_score(y_test, dtree_pred_class)

#confusion matrix
print("Decision tree confusion matrix:")
print(dtreeConfusion)
print("")
#precision
print("Decision tree precision:")
print(dtreePrecision)
print("")
#recall
print("Decision tree recall:")
print(dtreeRecall)
print("")
#f-measure
print("Decision tree F-measure:")
print(dtreeF1)

Decision tree confusion matrix:
[[ 894  827]
 [1176 1006]]

Decision tree precision:
0.5488270594653574

Decision tree recall:
0.461044912923923

Decision tree F-measure:
0.501120797011208


In [28]:
#SVM
SVM_pred_class = clf.predict(X_test)

SVMconfusion = metrics.confusion_matrix(y_test, SVM_pred_class, labels=[0, 1])
SVMrecall = metrics.recall_score(y_test, SVM_pred_class)
SVMprecision = metrics.precision_score(y_test, SVM_pred_class)
SVMf1 = metrics.f1_score(y_test, SVM_pred_class)

#confusion matrix
print("SVM confusion matrix:")
print(SVMconfusion)
print("")
#precision
print("SVM precision:")
print(SVMprecision)
print("")
#recall
print("SVM recall:")
print(SVMrecall)
print("")
#f-measure
print("SVM F-measure:")
print(SVMf1)

SVM confusion matrix:
[[ 436 1285]
 [  43 2139]]

SVM precision:
0.6247079439252337

SVM recall:
0.9802933088909258

SVM F-measure:
0.7631109525508384


In [30]:
#Logistic Regression
lr_pred_class = lr.predict(X_test)

lrConfusion = metrics.confusion_matrix(y_test, lr_pred_class, labels=[0, 1])
lrRecall = metrics.recall_score(y_test, lr_pred_class)
lrPrecision = metrics.precision_score(y_test, lr_pred_class)
lrF1 = metrics.f1_score(y_test, lr_pred_class)

#confusion matrix
print("Logistic regression confusion matrix:")
print(lrConfusion)
print("")
#precision
print("Logistic regression precision:")
print(lrPrecision)
print("")
#recall
print("Logistic regression recall:")
print(lrRecall)
print("")
#f-measure
print("Logistic regression F-measure:")
print(lrF1)

Logistic regression confusion matrix:
[[1262  459]
 [ 640 1542]]

Logistic regression precision:
0.7706146926536732

Logistic regression recall:
0.7066911090742438

Logistic regression F-measure:
0.7372699019842219


In [31]:
#Neural Networks
mlp_pred_class = mlp.predict(X_test)

mlpConfusion = metrics.confusion_matrix(y_test, mlp_pred_class, labels=[0, 1])
mlpRecall = metrics.recall_score(y_test, mlp_pred_class)
mlpPrecision = metrics.precision_score(y_test, mlp_pred_class)
mlpF1 = metrics.f1_score(y_test, mlp_pred_class)

#confusion matrix
print("Neural Networks confusion matrix:")
print(mlpConfusion)
print("")
#precision
print("Neural Networks precision:")
print(mlpPrecision)
print("")
#recall
print("Neural Networks recall:")
print(mlpRecall)
print("")
#f-measure
print("Neural Networks F-measure:")
print(mlpF1)

Neural Networks confusion matrix:
[[1123  598]
 [ 611 1571]]

Neural Networks precision:
0.7242969110189027

Neural Networks recall:
0.7199816681943172

Neural Networks F-measure:
0.7221328430245921
