In [2]:
import pandas as pd

india_weather = pd.DataFrame({
    "City": ["Mumbai", "Delhi", "Bangalore"],
    "Temperature": [28, 32, 25],
    "Humidity": [60, 80, 70],
})
india_weather

Unnamed: 0,City,Temperature,Humidity
0,Mumbai,28,60
1,Delhi,32,80
2,Bangalore,25,70


In [4]:
us_weather = pd.DataFrame({
    "City": ["New York", "Chicago", "Orlando"],
    "Temperature": [21, 14, 35],
    "Humidity": [50, 60, 75],
})
us_weather

Unnamed: 0,City,Temperature,Humidity
0,New York,21,50
1,Chicago,14,60
2,Orlando,35,75


In [5]:
df = pd.concat([india_weather,us_weather])
df

Unnamed: 0,City,Temperature,Humidity
0,Mumbai,28,60
1,Delhi,32,80
2,Bangalore,25,70
0,New York,21,50
1,Chicago,14,60
2,Orlando,35,75


In [7]:
# to change indexing as above we have seen
df = pd.concat([india_weather,us_weather], ignore_index=True)
df

Unnamed: 0,City,Temperature,Humidity
0,Mumbai,28,60
1,Delhi,32,80
2,Bangalore,25,70
3,New York,21,50
4,Chicago,14,60
5,Orlando,35,75


In [9]:
df.to_csv("sai.csv",index=True)

In [11]:
# we can use to diffrentiate values
df = pd.concat([india_weather,us_weather],keys=["india","us"])
df

Unnamed: 0,Unnamed: 1,City,Temperature,Humidity
india,0,Mumbai,28,60
india,1,Delhi,32,80
india,2,Bangalore,25,70
us,0,New York,21,50
us,1,Chicago,14,60
us,2,Orlando,35,75


In [None]:
# loc is used to collect data from particular key

df.loc["us"]

Unnamed: 0,City,Temperature,Humidity
0,New York,21,50
1,Chicago,14,60
2,Orlando,35,75


In [24]:
temprature_df = pd.DataFrame({
    "city":["Delhi","Mumbai","chennai"],
    "temperature":[25,28,32]
},index=[0,1,2])
temprature_df

Unnamed: 0,city,temperature
0,Delhi,25
1,Mumbai,28
2,chennai,32


In [27]:
windspeed_df = pd.DataFrame({
    "city":["Mumbai","Delhi"],
    "windspeed":[45.0,30.0]
},index=[1,0])
windspeed_df

Unnamed: 0,city,windspeed
1,Mumbai,45.0
0,Delhi,30.0


In [28]:
sf = pd.concat([temprature_df,windspeed_df],axis=1)
sf

Unnamed: 0,city,temperature,city.1,windspeed
0,Delhi,25,Delhi,30.0
1,Mumbai,28,Mumbai,45.0
2,chennai,32,,


In [29]:
temprature_df = pd.DataFrame({
    "city":["Delhi","Mumbai","chennai"],
    "temperature":[25,28,32]
},index=[0,1,2])
temprature_df

Unnamed: 0,city,temperature
0,Delhi,25
1,Mumbai,28
2,chennai,32


In [34]:
#  Series can be used to add new column to database
s = pd.Series(["humid","hot","rainy"],name="event")
print(s)

0    humid
1      hot
2    rainy
Name: event, dtype: object


In [33]:
pd.concat([temprature_df,s],axis=1)

Unnamed: 0,city,temperature,event
0,Delhi,25,humid
1,Mumbai,28,hot
2,chennai,32,rainy


## Merge DataFrame


In [None]:
import pandas as pd
df1 = pd.DataFrame({
    "city":["newyork","chicago","orlando"],
    "temprature":[21,23,32]
})
df1

Unnamed: 0,city,temprature
0,newyork,21
1,chicago,23
2,orlando,32


In [42]:
df2 = pd.DataFrame({
    "city":["chicago","newyork","orlando"],
    "humidity":[21,23,32]
})
df2

Unnamed: 0,city,humidity
0,chicago,21
1,newyork,23
2,orlando,32


In [43]:
df3 = pd.merge(df1,df2,on="city")
df3

Unnamed: 0,city,temprature,humidity
0,newyork,21,23
1,chicago,23,21
2,orlando,32,32


In [None]:
import pandas as pd
df2 = pd.DataFrame({
    "city":["chicago","newyork","toronto","sanfrancissco"],
    "humidity":[21,23,32,22]
})
df1 = pd.DataFrame({
    "city":["newyork","chicago","orlando"],
    "temprature":[21,23,32]
})
# here we use outer join all data will be in table
df3 = pd.merge(df1,df2,on="city",how="outer")
df3

Unnamed: 0,city,temprature,humidity
0,chicago,23.0,21.0
1,newyork,21.0,23.0
2,orlando,32.0,
3,sanfrancissco,,22.0
4,toronto,,32.0


In [None]:
# inner join will be default
df3 = pd.merge(df1,df2,on="city",how="inner")
df3

Unnamed: 0,city,temprature,humidity
0,newyork,21,23
1,chicago,23,21


In [None]:
# here left join will insert all row of left table. 
df3 = pd.merge(df1,df2,on="city",how="left")
df3

Unnamed: 0,city,temprature,humidity
0,newyork,21,23.0
1,chicago,23,21.0
2,orlando,32,


In [15]:
# here left join will insert all row of right table. 
df3 = pd.merge(df1,df2,on="city",how="right")
df3

Unnamed: 0,city,temprature,humidity
0,chicago,23.0,21
1,newyork,21.0,23
2,toronto,,32
3,sanfrancissco,,22


In [None]:
# indicator function tells from where data come from
df3 = pd.merge(df1,df2,on="city",how="outer",indicator=True)
df3

Unnamed: 0,city,temprature,humidity,_merge
0,chicago,23.0,21.0,both
1,newyork,21.0,23.0,both
2,orlando,32.0,,left_only
3,sanfrancissco,,22.0,right_only
4,toronto,,32.0,right_only
