In [19]:
# In this tutorial we are going to learn about Pandas concatenate function
# Concatenate is an opreation that you do when you want to join two or more dataframes

In [20]:
import pandas as pd

In [21]:
india_weather = pd.DataFrame({
    "city": ["mumbai","delhi","banglore"],
    "temperature": [32,45,30],
    "humidity": [80, 60, 78]
})
india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [22]:
us_weather = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
    "humidity": [68, 65, 75]
})
us_weather

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [23]:
# Now I want to join these two dataframes together so I can have a single dtaframe, so I'll use concat function
# You can pass more than two dataframes
df = pd.concat([india_weather,us_weather])

In [24]:
# The problem here you can see here is that it's using the index from the orginal dataset
df.sort_index()
# You can also see that you can't use sort index function here for continious index

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
0,new york,21,68
1,delhi,45,60
1,chicago,14,65
2,banglore,30,78
2,orlando,35,75


In [25]:
# You can pass an argument here called ignore_index
df = pd.concat([india_weather,us_weather], ignore_index=True)
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78
3,new york,21,68
4,chicago,14,65
5,orlando,35,75


In [26]:
# You can also pass keys , let's say I have this dataframe here
# You can associate a key with each of these dataframes that you have passed in the list of concat
# It doesn't work with ignore index so you have to remove that first
df = pd.concat([india_weather,us_weather],keys=["india", 'us'])
df

# It created an additonal index for your subset of dataset

Unnamed: 0,Unnamed: 1,city,temperature,humidity
india,0,mumbai,32,80
india,1,delhi,45,60
india,2,banglore,30,78
us,0,new york,21,68
us,1,chicago,14,65
us,2,orlando,35,75


In [27]:
# The way you can use this index here is very simple
df.loc['india']
# Now you can retrieve a subset of your dataset now

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [28]:
# Sometimes you might have a case where you just wanted to append the 2nd dataframe as columns instead of rows
# let's look at that type of usecase
temperature_df = pd.DataFrame({
    "city": ["mumbai","delhi","banglore"],
    "temperature": [32,45,30],
}, index=[0,1,2])

temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [29]:
windspeed_df = pd.DataFrame({
    "city": ["delhi","mumbai", 'banglore'],
    "windspeed": [7,12,9],
})

windspeed_df

Unnamed: 0,city,windspeed
0,delhi,7
1,mumbai,12
2,banglore,9


In [31]:
# Now if you see here it's just gonna append the second dataframe as rows which will give us NaN values
pd.concat([temperature_df, windspeed_df])

Unnamed: 0,city,temperature,windspeed
0,mumbai,32.0,
1,delhi,45.0,
2,banglore,30.0,
0,delhi,,7.0
1,mumbai,,12.0
2,banglore,,9.0


In [33]:
# Now when you append this dataframe, ideally you want to see winspeed as column not like the above cell

In [32]:
# In order to resolve this issue yu can use the axis arguement
pd.concat([temperature_df, windspeed_df], axis=1)
# If you see the documentation it is by default on axis = 0 and it's just append the data as rows
# but when you change axis = 1 it's now gonna append them as columns

Unnamed: 0,city,temperature,city.1,windspeed
0,mumbai,32,delhi,7
1,delhi,45,mumbai,12
2,banglore,30,banglore,9


In [34]:
# Realisticly talking data is not always perfect so let's say that cities are not in order 
    
windspeed_df = ({"city": ["mumbai","delhi"],
    "windspeed": [7,12],
})

windspeed_df

{'city': ['mumbai', 'delhi'], 'windspeed': [7, 12]}

In [None]:
# to resolve the issue you can use index funxtion
# The output will not look good because it'll just append as rows

In [38]:
# In order to oderly arrange your data you'll us the index funtion
windspeed_df = pd.DataFrame({
    "city": ["mumbai","delhi"],
    "windspeed": [7,12],
}, index=[0,1])

windspeed_df

Unnamed: 0,city,windspeed
0,mumbai,7
1,delhi,12


In [39]:
# We can also join a dataframe with a sereis
temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [48]:
# Now this how I have created my series and this what I want to append it 
s = pd.Series(['Humid', 'Dry', 'Rain'], name='event')
s

0    Humid
1      Dry
2     Rain
Name: event, dtype: object

In [49]:
# You can do it using the following code
# Here you will say axis = 1 too becuase you want to have event as column not rows 
df = pd.concat([temperature_df, s],axis=1)
df

Unnamed: 0,city,temperature,event
0,mumbai,32,Humid
1,delhi,45,Dry
2,banglore,30,Rain
