## **Concatination and Merging Datasets**

In [2]:
import numpy as np
import pandas as pd

In [3]:
# concatinating 2 datasets

groups1 = pd.read_csv("data/groups1.csv")
groups2 = pd.read_csv("data/groups2.csv")

groups = pd.concat(objs=[groups1, groups2], ignore_index=True)
groups

Unnamed: 0,group_id,name,category_id,city_id
0,6388,Alternative Health NYC,14,10001
1,6510,Alternative Energy Meetup,4,10001
2,8458,NYC Animal Rights,26,10001
3,8940,The New York City Anime Group,29,10001
4,10104,NYC Pit Bull Group,26,10001
...,...,...,...,...
16325,26377464,Shinect,34,94101
16326,26377698,The art of getting what you want [conference s...,14,94101
16327,26378067,Streeterville Running Group,9,60601
16328,26378128,Just Dance NYC,23,10001


In [4]:
cities = pd.read_csv("data/cities.csv")
categories = pd.read_csv("data/categories.csv")

- Outer   = A U B        --- All
- Inner   = A ∩ B        --- Common in both
- left    = A - B        --- exclude the common rows from A and B, remaining rows in B
- right   = B - A

In [None]:
groups.merge(categories, how="left", on="category_id")          #A-B

Unnamed: 0,group_id,name,category_id,city_id,category_name
0,6388,Alternative Health NYC,14,10001,Health & Wellbeing
1,6510,Alternative Energy Meetup,4,10001,Community & Environment
2,8458,NYC Animal Rights,26,10001,
3,8940,The New York City Anime Group,29,10001,Sci-Fi & Fantasy
4,10104,NYC Pit Bull Group,26,10001,
...,...,...,...,...,...
16325,26377464,Shinect,34,94101,
16326,26377698,The art of getting what you want [conference s...,14,94101,Health & Wellbeing
16327,26378067,Streeterville Running Group,9,60601,Fitness
16328,26378128,Just Dance NYC,23,10001,Outdoors & Adventure


In [8]:
groups.merge(categories, how="right", on="category_id")  

Unnamed: 0,group_id,name,category_id,city_id,category_name
0,153288.0,Chicago Theater Goers Meetup Group,1,60601.0,Arts & Culture
1,178198.0,Union Square Reading Group,1,10001.0,Arts & Culture
2,234020.0,The San Francisco Figure Drawing Group,1,94101.0,Arts & Culture
3,292067.0,The San Francisco Play Reading Meetup Group,1,94101.0,Arts & Culture
4,297702.0,NEW YORK MUSEUM CLUB,1,10001.0,Arts & Culture
...,...,...,...,...,...
8033,26231454.0,We Love Diverse Stories,36,10001.0,Writing
8034,26246909.0,"Writing to Heal, Writing to Grow",36,10001.0,Writing
8035,26265769.0,Query Critique Group For Aspiring Authors,36,10001.0,Writing
8036,26318734.0,San Francisco UX Writers Meetup,36,94101.0,Writing


In [None]:
groups.merge(categories, how="outer", on="category_id")     # outer = all the rows from both the tables

Unnamed: 0,group_id,name,category_id,city_id,category_name
0,153288.0,Chicago Theater Goers Meetup Group,1,60601.0,Arts & Culture
1,178198.0,Union Square Reading Group,1,10001.0,Arts & Culture
2,234020.0,The San Francisco Figure Drawing Group,1,94101.0,Arts & Culture
3,292067.0,The San Francisco Play Reading Meetup Group,1,94101.0,Arts & Culture
4,297702.0,NEW YORK MUSEUM CLUB,1,10001.0,Arts & Culture
...,...,...,...,...,...
16326,26231454.0,We Love Diverse Stories,36,10001.0,Writing
16327,26246909.0,"Writing to Heal, Writing to Grow",36,10001.0,Writing
16328,26265769.0,Query Critique Group For Aspiring Authors,36,10001.0,Writing
16329,26318734.0,San Francisco UX Writers Meetup,36,94101.0,Writing


In [None]:
groups.merge(categories, how="inner", on="category_id")         # common category_id columns in both the tables

Unnamed: 0,group_id,name,category_id,city_id,category_name
0,6388,Alternative Health NYC,14,10001,Health & Wellbeing
1,6510,Alternative Energy Meetup,4,10001,Community & Environment
2,8940,The New York City Anime Group,29,10001,Sci-Fi & Fantasy
3,10359,NYC International Arabic Language & Culture Club,16,10001,Language & Ethnic Identity
4,12111,The New York City American Sign Language Meetu...,16,10001,Language & Ethnic Identity
...,...,...,...,...,...
8032,26374579,Digital Film Academy Production Pitch,20,10001,Movies & Film
8033,26374655,Vocabulary workhop,16,94101,Language & Ethnic Identity
8034,26377698,The art of getting what you want [conference s...,14,94101,Health & Wellbeing
8035,26378067,Streeterville Running Group,9,60601,Fitness


In [9]:
groups.merge(cities, how="inner", left_on="city_id", right_on="id")  

Unnamed: 0,group_id,name,category_id,city_id,id,city,state,zip
0,6388,Alternative Health NYC,14,10001,10001,New York,NY,10001
1,6510,Alternative Energy Meetup,4,10001,10001,New York,NY,10001
2,8458,NYC Animal Rights,26,10001,10001,New York,NY,10001
3,8940,The New York City Anime Group,29,10001,10001,New York,NY,10001
4,10104,NYC Pit Bull Group,26,10001,10001,New York,NY,10001
...,...,...,...,...,...,...,...,...
16325,26377464,Shinect,34,94101,94101,San Francisco,CA,94101
16326,26377698,The art of getting what you want [conference s...,14,94101,94101,San Francisco,CA,94101
16327,26378067,Streeterville Running Group,9,60601,60601,Chicago,IL,60290
16328,26378128,Just Dance NYC,23,10001,10001,New York,NY,10001


In [14]:
groups.merge(cities, how="outer", left_on="city_id", right_on="id", indicator=True)

Unnamed: 0,group_id,name,category_id,city_id,id,city,state,zip,_merge
0,19525264.0,West NY | Hoboken | Social Group 20s & 30s,31.0,7093.0,7093,West New York,NJ,7093,both
1,20240670.0,Nerdy NJ/NY,11.0,7093.0,7093,West New York,NJ,7093,both
2,20395344.0,The Founding Moms' Exchange: North Jersey,2.0,7093.0,7093,West New York,NJ,7093,both
3,21130076.0,West New York Playdates for little ones,25.0,7093.0,7093,West New York,NJ,7093,both
4,21825714.0,Women~Wellness~Wisdom,14.0,7093.0,7093,West New York,NJ,7093,both
...,...,...,...,...,...,...,...,...,...
16329,26373602.0,Earndotcom: SF's most awesome digital currency...,34.0,94101.0,94101,San Francisco,CA,94101,both
16330,26374655.0,Vocabulary workhop,16.0,94101.0,94101,San Francisco,CA,94101,both
16331,26377464.0,Shinect,34.0,94101.0,94101,San Francisco,CA,94101,both
16332,26377698.0,The art of getting what you want [conference s...,14.0,94101.0,94101,San Francisco,CA,94101,both


In [15]:
groups.merge(cities, how="left", left_on="city_id", right_on="id", indicator=True)

Unnamed: 0,group_id,name,category_id,city_id,id,city,state,zip,_merge
0,6388,Alternative Health NYC,14,10001,10001,New York,NY,10001,both
1,6510,Alternative Energy Meetup,4,10001,10001,New York,NY,10001,both
2,8458,NYC Animal Rights,26,10001,10001,New York,NY,10001,both
3,8940,The New York City Anime Group,29,10001,10001,New York,NY,10001,both
4,10104,NYC Pit Bull Group,26,10001,10001,New York,NY,10001,both
...,...,...,...,...,...,...,...,...,...
16325,26377464,Shinect,34,94101,94101,San Francisco,CA,94101,both
16326,26377698,The art of getting what you want [conference s...,14,94101,94101,San Francisco,CA,94101,both
16327,26378067,Streeterville Running Group,9,60601,60601,Chicago,IL,60290,both
16328,26378128,Just Dance NYC,23,10001,10001,New York,NY,10001,both


In [16]:
groups.merge(cities, how="right", left_on="city_id", right_on="id", indicator=True)

Unnamed: 0,group_id,name,category_id,city_id,id,city,state,zip,_merge
0,19525264.0,West NY | Hoboken | Social Group 20s & 30s,31.0,7093.0,7093,West New York,NJ,7093,both
1,20240670.0,Nerdy NJ/NY,11.0,7093.0,7093,West New York,NJ,7093,both
2,20395344.0,The Founding Moms' Exchange: North Jersey,2.0,7093.0,7093,West New York,NJ,7093,both
3,21130076.0,West New York Playdates for little ones,25.0,7093.0,7093,West New York,NJ,7093,both
4,21825714.0,Women~Wellness~Wisdom,14.0,7093.0,7093,West New York,NJ,7093,both
...,...,...,...,...,...,...,...,...,...
16329,26373602.0,Earndotcom: SF's most awesome digital currency...,34.0,94101.0,94101,San Francisco,CA,94101,both
16330,26374655.0,Vocabulary workhop,16.0,94101.0,94101,San Francisco,CA,94101,both
16331,26377464.0,Shinect,34.0,94101.0,94101,San Francisco,CA,94101,both
16332,26377698.0,The art of getting what you want [conference s...,14.0,94101.0,94101,San Francisco,CA,94101,both


In [17]:
groups.merge(cities, how="inner", left_on="city_id", right_on="id", indicator=True)

Unnamed: 0,group_id,name,category_id,city_id,id,city,state,zip,_merge
0,6388,Alternative Health NYC,14,10001,10001,New York,NY,10001,both
1,6510,Alternative Energy Meetup,4,10001,10001,New York,NY,10001,both
2,8458,NYC Animal Rights,26,10001,10001,New York,NY,10001,both
3,8940,The New York City Anime Group,29,10001,10001,New York,NY,10001,both
4,10104,NYC Pit Bull Group,26,10001,10001,New York,NY,10001,both
...,...,...,...,...,...,...,...,...,...
16325,26377464,Shinect,34,94101,94101,San Francisco,CA,94101,both
16326,26377698,The art of getting what you want [conference s...,14,94101,94101,San Francisco,CA,94101,both
16327,26378067,Streeterville Running Group,9,60601,60601,Chicago,IL,60290,both
16328,26378128,Just Dance NYC,23,10001,10001,New York,NY,10001,both
