In [427]:
#importing libraries for EDA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from bokeh.io import show, output_notebook, output_file
from bokeh.plotting import figure
from bokeh.models import HoverTool, CategoricalColorMapper, ColumnDataSource
from bokeh.palettes import Paired7 as palette
from bokeh.palettes import Paired3

pd.options.display.max_columns = None

In [430]:
output_notebook()

### Exploratory Data Analysis (EDA)

### Categories File

This file lists out the different categories meetup groups fall in. There are a total of 33 different categories. The categories has a broad range. Some of them are for Arts, Career building, Sports and even Socializing.

In [7]:
#exploring the categories csv file
df_categories = pd.read_csv('categories.csv')
df_categories.head()

Unnamed: 0,category_id,category_name,shortname,sort_name
0,1,Arts & Culture,Arts,Arts & Culture
1,2,Career & Business,Business,Career & Business
2,3,Cars & Motorcycles,Auto,Cars & Motorcycles
3,4,Community & Environment,Community,Community & Environment
4,5,Dancing,Dancing,Dancing


In [10]:
df_categories.tail()

Unnamed: 0,category_id,category_name,shortname,sort_name
28,31,Socializing,Social,Socializing
29,32,Sports & Recreation,Sports,Sports & Recreation
30,33,Support,Support,Support
31,34,Tech,Tech,Tech
32,36,Writing,Writing,Writing


In [9]:
df_categories.category_id.unique().shape

(33,)

### Cities File

The data gathered from Kaggle predominantly focuses on these three major cities: NYC, Chicago and San Francisco. However, there seems to be a city that was mistakenly queried due to the name of the city. New York Mills is a small city in Minnesota. Comparing this city to the others, this query may not have been intentional.

In [12]:
df_cities = pd.read_csv('cities.csv')
df_cities

Unnamed: 0,city,city_id,country,distance,latitude,localized_country_name,longitude,member_count,ranking,state,zip
0,West New York,7093,us,2524.541,40.790001,USA,-74.010002,661,32,NJ,7093
1,New York,10001,us,2526.837,40.75,USA,-73.989998,229371,0,NY,10001
2,New York Mills,13417,us,2392.162,43.099998,USA,-75.290001,22,109,NY,13417
3,East Chicago,46312,us,1810.371,41.639999,USA,-87.459999,31,90,IN,46312
4,New York Mills,56567,us,1418.834,46.689999,USA,-95.349998,5,1,MN,56567
5,North Chicago,60064,us,1779.682,42.330002,USA,-87.860001,45,32,IL,60064
6,West Chicago,60185,us,1768.894,41.889999,USA,-88.199997,214,95,IL,60185
7,Chicago Heights,60411,us,1804.834,41.509998,USA,-87.610001,141,128,IL,60411
8,Chicago Ridge,60415,us,1793.115,41.700001,USA,-87.779999,77,178,IL,60415
9,Chicago,60601,us,1798.434,41.880001,USA,-87.620003,90918,0,IL,60290


### Group File

By looking at the unique values in city column, it can be seen that New York Mills is not included within the group table. This is consistent with the assumption that the query for New York Mills was not intentional.

In [477]:
df_groups = pd.read_csv('groups.csv')
df_groups.head()

Unnamed: 0,group_id,category_id,category.name,category.shortname,city_id,city,country,created,description,group_photo.base_url,group_photo.highres_link,group_photo.photo_id,group_photo.photo_link,group_photo.thumb_link,group_photo.type,join_mode,lat,link,lon,members,group_name,organizer.member_id,organizer.name,organizer.photo.base_url,organizer.photo.highres_link,organizer.photo.photo_id,organizer.photo.photo_link,organizer.photo.thumb_link,organizer.photo.type,rating,state,timezone,urlname,utc_offset,visibility,who
0,6388,14,health/wellbeing,health-wellbeing,10001,New York,US,2002-11-21 16:50:46,Those who practice or hold a strong interest i...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/1...,61087482,https://secure.meetupstatic.com/photos/event/1...,https://secure.meetupstatic.com/photos/event/1...,event,open,40.75,https://www.meetup.com/alternative-health-nyc/,-73.989998,1440,Alternative Health NYC,1513133,Joel E.,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,3982960,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.39,NY,US/Eastern,alternative-health-nyc,-14400,public,Explorers of Health
1,6510,4,community/environment,community-environment,10001,New York,US,2003-05-20 14:48:54,The New York Alternative Energy Meetupis for t...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/a...,462764217,https://secure.meetupstatic.com/photos/event/a...,https://secure.meetupstatic.com/photos/event/a...,event,open,40.75,https://www.meetup.com/alternative-energy-meetup/,-73.989998,969,Alternative Energy Meetup,3955940,Yair Greenbaum,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,7966960,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.31,NY,US/Eastern,alternative-energy-meetup,-14400,public,Clean Energy Supporters
2,8458,26,pets/animals,pets-animals,10001,New York,US,2004-03-27 09:55:41,not_found,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/5...,431422360,https://secure.meetupstatic.com/photos/event/5...,https://secure.meetupstatic.com/photos/event/5...,event,open,40.73,https://www.meetup.com/Animals/,-73.989998,2930,NYC Animal Rights,1809940,Santos,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,259015234,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.84,NY,US/Eastern,Animals,-14400,public,Animal Voices
3,8940,29,sci-fi/fantasy,sci-fi-fantasy,10001,New York,US,2002-11-16 04:49:16,Welcome to the The New York City Anime Meetup ...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/5...,2424355,https://secure.meetupstatic.com/photos/event/5...,https://secure.meetupstatic.com/photos/event/5...,event,open,40.75,https://www.meetup.com/NYC-Anime/,-73.989998,5080,The New York City Anime Group,2548151,Al Mejias,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,1920491,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.46,NY,US/Eastern,NYC-Anime,-14400,public,Anime Fans
4,10104,26,pets/animals,pets-animals,10001,New York,US,2003-10-22 21:39:49,"We welcome those who support pits, even if you...",https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/9...,12940295,https://secure.meetupstatic.com/photos/event/9...,https://secure.meetupstatic.com/photos/event/9...,event,open,40.720001,https://www.meetup.com/NYC-Pitbull/,-74.0,2097,NYC Pit Bull Group,1929168,Amy,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,1659284,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.09,NY,US/Eastern,NYC-Pitbull,-14400,public_limited,"NYC Pits & People, Dog Lovers"


In [7]:
df_groups.city.unique()

array(['New York', 'Chicago', 'San Francisco', 'South San Francisco',
       'West Chicago', 'Chicago Ridge', 'Chicago Heights', 'West New York',
       'North Chicago'], dtype=object)

In [478]:
#splitting the groups in the dataset into their corresponding cities
df_groups['created'] = pd.to_datetime(df_groups.created, yearfirst=True)
sf = ['San Francisco', 'san francisco', 'South San Francisco']
df_sfg = df_groups[df_groups.city.isin(sf)]
ny = ['New York', 'West New York', 'New York Mills']
df_nyg = df_groups[df_groups.city.isin(ny)]
chi = ['Chicago','Chicago Heights','West Chicago','Chicago Ridge','East Chicago','North Chicago','Chicago Park']
df_chig = df_groups[df_groups.city.isin(chi)]

#printing the number of groups in each city
print('Number of Groups in New York: {}'.format(len(df_nyg)))
print('Number of Groups in SF: {}'.format(len(df_sfg)))
print('Number of Groups in Chicago: {}'.format(len(df_chig)))

Number of Groups in New York: 8576
Number of Groups in SF: 4574
Number of Groups in Chicago: 3180


### What categories are popular in each of these three cities?

From the horizontal bar graphs shown belowm, it can be seen that each city has the same top 3 category groups. They are Tech, Career & Business, and Socializing. However, in the city of San Francisco, Health/Well being is really close to Socializing.

In [433]:
def category_count(df):
    """This function takes in a DataFrame and returns number of groups for each category in a list."""
    count = []
    categories = list(df_groups['category.name'].unique())
    
    for category in categories:
        count.append(df[df['category.name'] == category].shape[0])
        
    return count

In [434]:
#a horizontal bargraph to show the distribution of type of groups for all three cities
from bokeh.layouts import column

categories = list(df_groups['category.name'].unique())

p_sf = figure(y_range = categories, title = 'Types of Groups in SF', x_axis_label = 'Distribution', y_axis_label= 'categories', plot_height = 400, plot_width = 800)
p_sf.hbar(y = categories, right = category_count(df_sfg), height = .8, left = 0)
hover = HoverTool(tooltips=[('Category', '@y'),('Count', '@right')])
p_sf.add_tools(hover)

p_ny = figure(y_range = categories, title = 'Types of Groups in NYC', x_axis_label = 'Distribution', y_axis_label = 'categories', plot_height = 400, plot_width = 800)
p_ny.hbar(y=categories, right = category_count(df_nyg), height =.8, left = 0)
hover = HoverTool(tooltips=[('Category', '@y'), ('Count', '@right')])
p_ny.add_tools(hover)

p_chi = figure(y_range = categories, title = 'Types of Groups in Chicago', x_axis_label = 'Distribution', y_axis_label = 'categories', plot_height = 400, plot_width = 800)
p_chi.hbar(y=categories, right = category_count(df_chig), height =.8, left = 0)
hover = HoverTool(tooltips=[('Category', '@y'), ('Count', '@right')])
p_chi.add_tools(hover)

layout = column(p_sf, p_ny, p_chi)
show(layout)


In [435]:
#this bokeh plot is the stacked version of the three plots above
from bokeh.palettes import Paired3

categories = list(df_groups['category.name'].unique())
p = figure(y_range = categories, title = 'Count of Group Types', plot_height = 400, plot_width = 800, 
           x_axis_label = 'Distribution', y_axis_label = 'Categories')

source = ColumnDataSource(data = {'category': categories, 'ny': category_count(df_nyg),
                                 'sf': category_count(df_sfg), 'chi': category_count(df_chig) })

p.hbar_stack(['ny', 'sf', 'chi'], y = 'category', color=Paired3, source = source, height = .8,
            legend = ['New York', 'San Francisco', 'Chicago'])
show(p)

### Which groups are the most popular in all three cities?

The top thirty groups are plotted below in descending order. The top five groups within the categories of outdoors/adventures, socializing, and career/business.

In [489]:
#getting the top 30 groups and the total number of members
df_temp = df_groups.sort_values(by='members', ascending = False)

pop = []
name = []
category = []
city = []
rating = []
track = 0

for idx, series in df_temp.iterrows():
    if track != 30:
        pop.append(series['members']/1000)
        name.append(series['group_name'])
        category.append(series['category.name'])
        city.append(series['city'])
        rating.append(series['rating'])
        track +=1 
        
pop.reverse()
name.reverse()
category.reverse()
city.reverse()
rating.reverse()

In [490]:
#plot the most popular groups in all three cities
from bokeh.palettes import GnBu3

source = ColumnDataSource(data = {'population':pop, 'name': name, 'category': category, 'city': city, 'rating':rating})
p_pop = figure(y_range = name,  title = 'Top 30 Most Popular Groups', plot_width = 1000, plot_height = 500, y_axis_label = 'Groups', x_axis_label = 'No. of Members(Thousands)')

p_pop.hbar(y = 'name', right = 'population', left = 0, height = .6, source = source)
hover = HoverTool(tooltips=[('Category', '@category'), ('City', '@city'), ('Rating', '@rating')])
p_pop.add_tools(hover)
show(p_pop)

### Were the top three group types on meetup always this popular? Are the trends the same throughout different cities?

In all three cities, it can be seen that the increase in tech groups began in 2008 and is still increasing. However, both Socializing and Career/Business groups seem to be trending groups. In each city for the past two years, both of these groups have increased by roughly 100 groups.

In [106]:
def year_count(df, column):
"""This function takes in a dataframe and the column of the dataframe and returns the count of the year as a dictionary."""
    count = {}    
    for idx, series in df.iterrows():
        for key in year:
            if series[column].year == key:
                if key not in count:
                    count[key] = 1
                else:
                    count[key] += 1
    
    return count

In [107]:
#this cell generates the data points for the increase in number of groups for the top three categories in each city.
topics = ['tech', 'career/business', 'socializing']
topic_count = []
year = np.arange(2002, 2018)

for topic in topics:
    #sf groups
    dict_temp = year_count(df_sfg[df_sfg['category.name'] == topic].sort_values(by='created'), 'created')
    list_temp = []
    
    for key in year:
        if key in dict_temp.keys():
            list_temp.append(dict_temp[key])
        else:
            list_temp.append(0)
        
    topic_count.append(list_temp)

    #ny groups
    dict_temp = year_count(df_nyg[df_nyg['category.name'] == topic].sort_values(by='created'), 'created')
    list_temp = []
    
    for key in year:
        if key in dict_temp.keys():
            list_temp.append(dict_temp[key])
        else:
            list_temp.append(0)
        
    topic_count.append(list_temp)
    
    #chicago groups
    dict_temp = year_count(df_chig[df_chig['category.name'] == topic].sort_values(by='created'), 'created')
    list_temp = []
    
    for key in year:
        if key in dict_temp.keys():
            list_temp.append(dict_temp[key])
        else:
            list_temp.append(0)
    topic_count.append(list_temp)

In [438]:
#this cell generates a bokeh plot for the trend for each group in all three cities
from bokeh.layouts import row

p = figure(title = 'SF Topic Trends', plot_width = 400, plot_height = 500, y_axis_label = 'Additional Groups', x_axis_label = 'Year')

p.line(x = year, y = topic_count[0], legend = 'Tech', color = Paired3[0], line_width = 3)
p.circle(year, topic_count[0], fill_color='white', size=4)
p.line(x = year, y = topic_count[3], legend = 'Career/Business', color = Paired3[1], line_width = 3)
p.circle(year, topic_count[3], fill_color='white', size=4)
p.line(x = year, y = topic_count[6], legend = 'Socializing', color = Paired3[2], line_width = 3)
p.circle(year, topic_count[6], fill_color='white', size=4)
p.legend.location = 'top_left'

p1 = figure(title = 'NY Topic Trends', plot_width = 400, plot_height = 500, y_axis_label = 'Additional Groups', x_axis_label = 'Year')

p1.line(x = year, y = topic_count[1], legend = 'Tech', color = Paired3[0], line_width = 3)
p1.circle(year, topic_count[1], fill_color='white', size=4)
p1.line(x = year, y = topic_count[4], legend = 'Career/Business', color = Paired3[1], line_width = 3)
p1.circle(year, topic_count[4], fill_color='white', size=4)
p1.line(x = year, y = topic_count[7], legend = 'Socializing', color = Paired3[2], line_width = 3)
p1.circle(year, topic_count[7], fill_color='white', size=4)
p1.legend.location = 'top_left'

p2 = figure(title = 'Chicago Topic Trends', plot_width = 400, plot_height = 500, y_axis_label = 'Additional Groups', x_axis_label = 'Year')

p2.line(x = year, y = topic_count[2], legend = 'Tech', color = Paired3[0], line_width = 3)
p2.circle(year, topic_count[2], fill_color='white', size=4)
p2.line(x = year, y = topic_count[5], legend = 'Career/Business', color = Paired3[1], line_width = 3)
p2.circle(year, topic_count[5], fill_color='white', size=4)
p2.line(x = year, y = topic_count[8], legend = 'Socializing', color = Paired3[2], line_width = 3)
p2.circle(year, topic_count[8], fill_color='white', size=4)
p2.legend.location = 'top_left'

layout = row(p, p1, p2)
show(layout)

### Member File

Out of the three cities, New York City has the most members followed by San Francisco and Chicago. Also, the year from 2014 to 2015 was the year in which Meetup had the most members participating in groups in all three cities.

In [130]:
#read in the member file
df_members = pd.read_csv('members.csv', encoding = 'latin-1')
df_members['joined'] = pd.to_datetime(df_members['joined'], yearfirst = True)
df_members.tail()

Unnamed: 0,member_id,bio,city,country,hometown,joined,lat,link,lon,member_name,state,member_status,visited,group_id
0,3,not_found,New York,us,"New York, NY",2007-05-01 22:04:37,40.72,http://www.meetup.com/members/3,-74.0,Matt Meeker,NY,active,2009-09-18 18:32:23,490552
1,3,not_found,New York,us,"New York, NY",2011-01-23 14:13:17,40.72,http://www.meetup.com/members/3,-74.0,Matt Meeker,NY,active,2011-03-20 01:02:11,1474611
2,3,"Hi, I'm Matt. I'm an entrepreneur who has star...",New York,us,"New York, NY",2010-12-30 18:47:34,40.72,http://www.meetup.com/members/3,-74.0,Matt Meeker,NY,active,2011-01-18 20:37:23,1490492
3,3,"Hi, I'm Matt. I'm an entrepreneur who has star...",New York,us,"New York, NY",2011-01-03 14:45:21,40.72,http://www.meetup.com/members/3,-74.0,Matt Meeker,NY,active,2011-07-23 03:42:28,1515830
4,3,"Hi, I'm Matt. I'm an entrepreneur who has star...",New York,us,"New York, NY",2010-12-30 18:34:50,40.72,http://www.meetup.com/members/3,-74.0,Matt Meeker,NY,active,2011-06-13 18:33:23,1574965


In [141]:
#separating the members by their city
sf = ['San Francisco', 'san francisco', 'South San Francisco']
df_sf = df_members[df_members.city.isin(sf)]
ny = ['New York', 'West New York']
df_ny = df_members[df_members.city.isin(ny)]
chi = ['Chicago','Chicago Heights','West Chicago','Chicago Ridge','East Chicago','North Chicago','Chicago Park']
df_chi = df_members[df_members.city.isin(chi)]

#printing the number of members in each city
print('Number of Members in New York: {}'.format(len(df_ny.member_id.unique())))
print('Number of Members in SF: {}'.format(len(df_sf.member_id.unique())))
print('Number of Members in Chicago: {}'.format(len(df_chi.member_id.unique())))

Number of Members in New York: 580884
Number of Members in SF: 270996
Number of Members in Chicago: 236055


### What year had the most activity in these three cities?

The following line graph captures user activity for each city. It can be seen that the year from 2014 to 2015 had the most activity increase in terms of members joining groups. This could be existing members joining other groups or new members being exposed to meetup.com.

In [164]:
#generated the data for SF
members_count = []
members = []
dict_temp = year_count(df_sf, 'joined')
    
for key in tqdm(year):
    members.append(dict_temp[key])

members_count.append(members)


  0%|          | 0/16 [00:00<?, ?it/s][A
100%|██████████| 16/16 [00:00<00:00, 11355.14it/s][A

In [169]:
#generated the data for NYC
members = []
dict_temp = year_count(df_ny, 'joined')
    
for key in tqdm(year):
    members.append(dict_temp[key])
    
members_count.append(members)


  0%|          | 0/16 [00:00<?, ?it/s][A
100%|██████████| 16/16 [00:00<00:00, 8819.67it/s][A

In [158]:
#generated the data for Chicago
members = []
dict_temp = year_count(df_chi, 'joined')
    
for key in tqdm(year):
    members.append(dict_temp[key])

members_count.append(members)


  0%|          | 0/16 [00:00<?, ?it/s][A
100%|██████████| 16/16 [00:00<00:00, 12851.18it/s][A

In [437]:
#this plot shows the total of groups joined by members for each city.
from bokeh.palettes import YlGnBu3
p = figure(title = 'Members Joined Trends', plot_width = 500, plot_height = 500, y_axis_label = 'Additional Group Join Activity', x_axis_label = 'Year')

p.line(x = year, y = members_count[0], legend = 'San Francisco', color = YlGnBu3[0], line_width = 3)
p.circle(year, members_count[0], fill_color='white', size=4)
p.line(x = year, y = members_count[1], legend = 'New York City', color = YlGnBu3[1], line_width = 3)
p.circle(year, y = members_count[1], fill_color='white', size=4)
p.line(x = year, y = members_count[2], legend = 'Chicago', color = YlGnBu3[2], line_width = 3)
p.circle(year, members_count[2], fill_color='white', size=4)

p.legend.location = 'top_left'
hover = HoverTool(tooltips=[('Joined Activity', '@y'), ('Year', '@x')])
p.add_tools(hover)
show(p)

### Events File

This file only contains events that were created from 2010 to 2017.

In [481]:
df_events = pd.read_csv('events.csv')
df_events.head()

Unnamed: 0,event_id,created,description,duration,event_url,fee.accepts,fee.amount,fee.currency,fee.description,fee.label,fee.required,group.created,group.group_lat,group.group_lon,group_id,group.join_mode,group.name,group.urlname,group.who,headcount,how_to_find_us,maybe_rsvp_count,event_name,photo_url,rating.average,rating.count,rsvp_limit,event_status,event_time,updated,utc_offset,venue.address_1,venue.address_2,venue.city,venue.country,venue_id,venue.lat,venue.localized_country_name,venue.lon,venue.name,venue.phone,venue.repinned,venue.state,venue.zip,visibility,waitlist_count,why,yes_rsvp_count
0,153868222,2013-12-03 21:24:29,Solve the murder case while eating a 3 course ...,20700,https://www.meetup.com/SanFranciscoStartupFun/...,others,0.0,not_found,per person,Price,0,2012-11-12 21:29:23,37.790001,-122.419998,5817262,open,San Francisco Startup Socials,SanFranciscoStartupFun,Members,0,We will be meeting OUTSIDE of the restaurant. ...,0,Murder Mystery Dinner Crawl. Dine at 3 Restaur...,not_found,0.0,-1,-1,upcoming,2017-10-30 02:30:00,2017-10-23 03:26:49,-25200,505 Broadway St.,not_found,San Francisco,us,23729697,37.79795,USA,-122.40569,Little Szechuan,-1,1,CA,-1,public,0,not_found,72
1,184167702,2014-05-20 18:52:00,NOTE: This event is EVERY FRIDAY!! Signup is a...,77400,https://www.meetup.com/hr-and-tech-sf/events/1...,others,0.0,not_found,per person,Price,0,2010-04-07 21:26:22,37.77,-122.410004,1627081,open,HR & Tech SF,hr-and-tech-sf,Members,0,not_found,0,Friday Night Drinks with International Travele...,not_found,0.0,-1,-1,upcoming,2017-10-28 04:00:00,2017-10-21 06:10:49,-25200,498 Broadway St,not_found,San Francisco,us,16948982,37.798172,USA,-122.405457,Horizon Lounge,-1,0,CA,-1,public,0,not_found,179
2,215200502,2014-10-23 16:18:44,We'll be heading out with a group of travelers...,37800,https://www.meetup.com/hr-and-tech-sf/events/2...,others,0.0,not_found,per person,Price,0,2010-04-07 21:26:22,37.77,-122.410004,1627081,open,HR & Tech SF,hr-and-tech-sf,Members,0,not_found,0,"Dinner, Comedy, 100 Beers & Debauchery!",not_found,0.0,-1,-1,upcoming,2017-10-27 03:00:00,2017-10-20 03:48:29,-25200,631 broadway st.,not_found,San Francisco,us,24717469,37.797516,USA,-122.407394,taste of vietnam noodle bar,-1,0,CA,-1,public,0,not_found,45
3,220826782,2015-02-28 19:27:32,NOTE: This event is EVERY FRIDAY!! Signup is a...,113400,https://www.meetup.com/SanFranciscoStartupFun/...,others,0.0,not_found,per person,Price,0,2012-11-12 21:29:23,37.790001,-122.419998,5817262,open,San Francisco Startup Socials,SanFranciscoStartupFun,Members,0,not_found,0,Friday Night Drinks,not_found,0.0,-1,-1,upcoming,2017-10-28 04:00:00,2017-10-21 06:11:09,-25200,498 Broadway,not_found,San Francisco,us,724783,37.798036,USA,-122.405442,Horizon,4155761118,0,CA,-1,public,0,not_found,198
4,227948102,2016-01-08 21:35:40,"Aussie Aussie Aussie, Oi Oi Oi! Join us for th...",36000,https://www.meetup.com/hr-and-tech-sf/events/2...,others,0.0,not_found,per person,Price,0,2010-04-07 21:26:22,37.77,-122.410004,1627081,open,HR & Tech SF,hr-and-tech-sf,Members,0,not_found,0,AUSTRALIA DAY PARTY & Sausage Sizzle! $3 Beer/...,not_found,0.0,-1,-1,upcoming,2018-01-26 01:00:00,2017-01-26 07:40:31,-28800,490 Broadway,not_found,San Francisco,US,20984572,37.79805,USA,-122.405251,Score! Bar and Lounge,-1,0,not_found,-1,public,0,not_found,24


In [491]:
#The number of events hosted by groups 
len(df_events), len(df_events.group_id.unique())

(5807, 341)

### Which groups were the most active in the past 7 years? How does this compare with the most popular groups?

The most active group from the past 7 years seem to be Chicago toastmasters with a total of 1061. The only group that is in most popular and most active in these three cities is Chicago Meetup. As a result, this shows that the most popular group did not necessarily mean they were the most active.

In [482]:
#joined df_groups and df_events together after computing the number of events created over the past 7 sevens by groups
df_copy = df_groups.join(df_events.groupby(['group_id']).created.count(), how ='inner',on= 'group_id', lsuffix= '_left', rsuffix = '_count')
df_copy = df_copy.sort_values(by = 'created_count', ascending = False)
df_copy.head()

Unnamed: 0,group_id,category_id,category.name,category.shortname,city_id,city,country,created_left,description,group_photo.base_url,group_photo.highres_link,group_photo.photo_id,group_photo.photo_link,group_photo.thumb_link,group_photo.type,join_mode,lat,link,lon,members,group_name,organizer.member_id,organizer.name,organizer.photo.base_url,organizer.photo.highres_link,organizer.photo.photo_id,organizer.photo.photo_link,organizer.photo.thumb_link,organizer.photo.type,rating,state,timezone,urlname,utc_offset,visibility,who,created_count
7053,18472146,2,career/business,career-business,60601,Chicago,US,2015-03-02 18:07:52,Interviewing for jobs? Giving a business prese...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/b...,434868568,https://secure.meetupstatic.com/photos/event/b...,https://secure.meetupstatic.com/photos/event/b...,event,open,41.889999,https://www.meetup.com/Chicago-Area-D30-Toastm...,-87.629997,259,Chicago Area District 30 Toastmasters,185191174,D30 Toastmasters,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,244496256,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,5.0,IL,US/Central,Chicago-Area-D30-Toastmasters,-18000,public,Members,1061
314,234020,1,fine arts/culture,arts-culture,94101,San Francisco,US,2006-04-17 04:50:20,The San Francisco Figure Drawing group has mul...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/f...,228663982,https://secure.meetupstatic.com/photos/event/f...,https://secure.meetupstatic.com/photos/event/f...,event,open,37.779999,https://www.meetup.com/SanFranciscoFigureDrawing/,-122.419998,4035,The San Francisco Figure Drawing Group,3360874,Johnny O'Brady,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,79556892,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.72,CA,US/Pacific,SanFranciscoFigureDrawing,-25200,public,"Artists Strong! DRAW, DRAW, DRAW",298
2752,2148441,2,career/business,career-business,94101,San Francisco,US,2011-07-10 02:47:16,Throughout the week the SF Free School offers ...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/2...,131470092,https://secure.meetupstatic.com/photos/event/2...,https://secure.meetupstatic.com/photos/event/2...,event,open,37.759998,https://www.meetup.com/SF-Free-School/,-122.440002,13694,SF Free School,12713693,Logan (mylo) Johnston,https://secure.meetupstatic.com,not_found,27577702,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.6,CA,US/Pacific,SF-Free-School,-25200,public,Freeschoolers,134
1543,1475712,1,fine arts/culture,arts-culture,60601,Chicago,US,2009-06-19 02:40:19,Chicago Meetup is Chicago's Meetup Group for c...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/a...,231824302,https://secure.meetupstatic.com/photos/event/a...,https://secure.meetupstatic.com/photos/event/a...,event,open,41.900002,https://www.meetup.com/ChicagoMeetup/,-87.639999,18512,Chicago Meetup,90978062,Ray,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,113480322,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.61,IL,US/Central,ChicagoMeetup,-18000,public,Members,114
8523,19150943,1,fine arts/culture,arts-culture,94101,San Francisco,US,2015-11-24 14:23:56,we meet twice a week and on Tuesdays we use fe...,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/event/8...,446495048,https://secure.meetupstatic.com/photos/event/8...,https://secure.meetupstatic.com/photos/event/8...,event,open,37.740002,https://www.meetup.com/CASTRO-STREET-Drawing-s...,-122.459999,529,CASTRO STREET Drawing sessions(female model ni...,16400541,Thomasina DeMaio,https://secure.meetupstatic.com,https://secure.meetupstatic.com/photos/member/...,247911511,https://secure.meetupstatic.com/photos/member/...,https://secure.meetupstatic.com/photos/member/...,member,4.0,CA,US/Pacific,CASTRO-STREET-Drawing-sessions-female-model-night,-25200,public,"artists,life sketchers",105


In [483]:
#retrieving data to plot the following bar graph
num = []
group = []
category = []
city = []
rating = []
track = 0

for idx, series in df_copy.iterrows():
    if track != 30:
        num.append(series['created_count'])
        group.append(series['group_name'])
        category.append(series['category.name'])
        city.append(series['city'])
        rating.append(series['rating'])
        track +=1 
        
num.reverse()
group.reverse()
category.reverse()
city.reverse()
rating.reverse()

In [488]:
#plot of which groups were the most active in the past 7 years
from bokeh.palettes import GnBu3
from bokeh.layouts import column

#Most active groups
source = ColumnDataSource(data = {'number':num, 'name':group, 'category':category, 'city':city, 'rating':rating})
p = figure(y_range = group,title='Most Active Groups', plot_height = 500, plot_width = 1000, x_axis_label = 'No. Events Hosted', y_axis_label = 'Groups')

p.hbar(y = 'name', right = 'number', left = 0, height = .6, source = source)
hover = HoverTool(tooltips=[('Rating', '@rating'),('City', '@city'), ('Category', '@category'),('Events Hosted', '@number')])
p.add_tools(hover)

layout = column(p, p_pop)
show(layout)

In [487]:
for i in tqdm(name):
    if i in group:
        print(i)

100%|██████████| 30/30 [00:00<00:00, 1975.49it/s]

Chicago Meetup



