In [1]:
# Dataset from Kaggle
# Data analysis for the new year resolutions

In [3]:
import pandas as pd
import numpy  as np
import plotly.express as px
import plotly.graph_objects as go

In [5]:
# Reading the dataset

In [7]:
# Read data and inspect sample
nyr_df = pd.read_csv('new_year_resolutions_dataset.csv', sep = ';')
nyr_df.sample(5)

Unnamed: 0,resolution_topics,gender,name,resolution_category,retweet_count,text,tweet_coord,tweet_created,tweet_date,tweet_id,tweet_location,tweet_state,user_timezone,tweet_region
3017,Other,male,mutemor,Recreation & Leisure,0.0,hope the only New Years resolution I DO keep i...,,2015-01-02T08:09:00,2015-01-02,"5,51048E+17","Dallas, Tejas",TX,Central Time (US & Canada),South
2664,Improve my body,male,Pablo_F_Marini,Health & Fitness,1.0,RT @Stitchdolll: My new years resolution is to...,,2015-01-01T11:36:00,2015-01-01,"5,50737E+17","Long Island, New York",NY,Atlantic Time (Canada),Northeast
2719,Humor about Family/Friends/Relationships Resol...,male,yesreKmaS,Humor,0.0,My New Years resolution is to catch Drake goin...,,2015-01-01T11:30:00,2015-01-01,"5,50736E+17","Lookout Mountain, GA",GA,Indiana (East),South
2496,Other,female,angilovesdodgrs,Health & Fitness,0.0,2015 my New Years resolution is to gain 20 pou...,,2015-01-01T11:55:00,2015-01-01,"5,50742E+17","Chicago, Illinois",IL,Central Time (US & Canada),Midwest
2687,Improve my body,female,heylilyy,Health & Fitness,0.0,I'm so excited to start my New Years resolutio...,,2015-01-01T11:34:00,2015-01-01,"5,50737E+17","Ann Arbor, MI",MI,Eastern Time (US & Canada),Midwest


In [8]:
# finding the most commmon resolution categories 

In [13]:
# Plotting the Resolution Categories

categories_df = nyr_df.groupby('resolution_category').size()
categories_df = pd.DataFrame(categories_df, columns=['count']).reset_index().sort_values('count', ascending = False)
fig = go.Figure()

fig.add_trace(go.Pie(labels = categories_df['resolution_category'],
                     values = categories_df['count'],
                     name = "Resolution Categories",
                     hole = 0.75,
                     marker_colors = px.colors.cyclical.Edge
                    )
             )

fig.update_layout(
    title_text = "Resolution Categories",
    annotations = [dict(text = 'Resolution Categories',
                      x = 0.5,
                      y = 0.5,
                      font_size = 20,
                      showarrow = False
                     )])

fig.show()

In [14]:
# Plotting the Resolution Topics

topics_df = nyr_df.groupby(['resolution_category','resolution_topics']).size()
topics_df = pd.DataFrame(topics_df, columns = ['count']).reset_index().sort_values('count', ascending = False)[0:20]
fig = go.Figure()

fig.add_trace(go.Pie(labels = topics_df['resolution_topics'],
                     values = topics_df['count'],
                     name = "Resolution Topics",
                     #color = topics_df['resolution_category'],
                     hole = 0.75,
                     marker_colors = px.colors.cyclical.Edge
                    )
             )


fig.update_layout(
    title_text = "The top 20 Resolution Topics",
    annotations = [dict(text = 'Resolution Topics',
                      x = 0.5,
                      y = 0.5,
                      font_size = 20,
                      showarrow = False
                     )])

fig.show()

In [15]:
# What does the Top 30 Most Retweeted consist of?
# This represents the popularity, might not be what we are looking for - but at least we may have a good laugh

In [19]:
# Sort by retweets, grab the first 30

top100_df = nyr_df.sort_values('retweet_count', ascending = False)[0:100]
for tweet in top100_df['text']:
    print(tweet)

RT @TweetLikeAGirI: my only New Years resolution is to not spend money on food I honestly might be rich by 2016
RT @ltsChuckBass: New Years resolution: find a love like Chuck &amp; Blair's http://t.co/58f1D3t2aJ
The only New Years resolution I've really, really settled on is READ MORE BOOKS. All the kinds. All year.
RT @FRONZ1LLA: New Years resolution: don't be a bitch
RT @HulkHogan: New Years Resolution ,only people with love and joy will be in my life,I refuse to engage with confrontational or negative p??_
RT @FreakingTrue: My New Years resolution is simply going to be remembering to write 2015 instead of 2014
my only New Years resolution is to not spend money on food I honestly might be rich by 2016
RT @TheColIegeLife: New Years resolution: http://t.co/sqDwcpYFPF
RT @Vikkstar123: My New Years Resolution is 1080p :')
RT @daxshepard1: New Years Resolution #1: drive slow and patiently two days a week. #2 Stop talking shit about other people. What are yours?
RT @beautyscene_: New Years

In [17]:
# Tweets distribution across US states.

In [20]:
# Compose state count dataframe and plot choropleth

us_state_count = nyr_df.groupby('tweet_state').size()
us_state_count = pd.DataFrame(us_state_count, columns=['count_of_tweets']).reset_index()

fig = px.choropleth(locations = us_state_count['tweet_state'],
                    locationmode = "USA-states",
                    color = us_state_count['count_of_tweets'],
                    color_continuous_scale = 'viridis',
                    scope = "usa",
                    title = 'No. of tweets per US State (Top 100)'
                   )
fig.show()