# DataFrames Manipulations

In [1]:
import pandas as pd

## Create Weather datasets

In [2]:
eu_weather_df = pd.DataFrame({
  "town": ["Atina", "Oslo", "London"],
  "temp":[35,21,25],
  "rain": [False, False, True ]
})
eu_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


In [3]:
bg_weather_df = pd.DataFrame({
  "town": ["Sofia", "Sandanski", "Pleven"],
  "temp":[25,32,21],
  "rain": [False, False, True ]
})
bg_weather_df

Unnamed: 0,town,temp,rain
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


## Re-arange columns

In [4]:
eu_weather_df = eu_weather_df[["temp", "rain","town"]]
eu_weather_df


Unnamed: 0,temp,rain,town
0,35,False,Atina
1,21,False,Oslo
2,25,True,London


In [5]:
# but let get the order back:
eu_weather_df = eu_weather_df[['town', 'temp', 'rain']]
eu_weather_df


Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


## Insert Columns

### Insert column into DF at specified location

Reference:https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.insert.html

In [6]:
# it's a good practice to check for column existence, before inserting it
if not "wind" in eu_weather_df.columns:
    eu_weather_df.insert(3,"wind",[1.5, 7.5, 4])
eu_weather_df

Unnamed: 0,town,temp,rain,wind
0,Atina,35,False,1.5
1,Oslo,21,False,7.5
2,London,25,True,4.0


## Deleting columns

#### in place: with del or pop()

In [7]:
if 'wind' in eu_weather_df:
	del eu_weather_df["wind"]
	# eu_weather_df.pop("wind")

eu_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


#### with DF.drop method

Reference: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html

In [8]:
# we can use columns argument to drop column(s)
eu_weather_df.drop(columns=["temp","rain"])

Unnamed: 0,town
0,Atina
1,Oslo
2,London


In [9]:
# or we can pass list of column names as first argument, and specify axis=1, in order to drop columns:
eu_weather_df.drop(["temp","rain"], axis=1)

Unnamed: 0,town
0,Atina
1,Oslo
2,London


In [10]:
# note that most operations of DataFrame objects do not modify the original object, but return a copy
eu_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True


## Concatenate DataDrames along rows or columns (a particular axis)

Reference: https://pandas.pydata.org/docs/reference/api/pandas.concat.html

### Concatenate Rows

Using pd.concat() we can add rows from one DF into another. 

In [11]:
# keep original indexes (dafault):
world_weather_df = pd.concat([eu_weather_df, bg_weather_df])
world_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


In [12]:
# auto indexing:
world_weather_df = pd.concat([eu_weather_df, bg_weather_df], ignore_index=True)
world_weather_df

Unnamed: 0,town,temp,rain
0,Atina,35,False
1,Oslo,21,False
2,London,25,True
3,Sofia,25,False
4,Sandanski,32,False
5,Pleven,21,True


In [13]:
# add keys for each DF
world_weather_df = pd.concat(
    [eu_weather_df, bg_weather_df],
    keys=["EU", "BG"]
)

world_weather_df

Unnamed: 0,Unnamed: 1,town,temp,rain
EU,0,Atina,35,False
EU,1,Oslo,21,False
EU,2,London,25,True
BG,0,Sofia,25,False
BG,1,Sandanski,32,False
BG,2,Pleven,21,True


In [14]:
# we can retrieve data by index:
world_weather_df.loc["BG"]

Unnamed: 0,town,temp,rain
0,Sofia,25,False
1,Sandanski,32,False
2,Pleven,21,True


### Concatenate Columns

Using pd.concat() we can add columns from one DF into another.
Note, that we must pass `axis=1` in order to concatenate columns, not rows.

In [15]:
# define wind_df
wind_df=pd.DataFrame([3.4, 2, 6.5], columns=["wind"])
wind_df

Unnamed: 0,wind
0,3.4
1,2.0
2,6.5


In [16]:
# to add columns from wind_df into bg_weather_df we have to specify axis=1
new_bg_weather_df= pd.concat([bg_weather_df, wind_df],axis=1)
new_bg_weather_df

Unnamed: 0,town,temp,rain,wind
0,Sofia,25,False,3.4
1,Sandanski,32,False,2.0
2,Pleven,21,True,6.5
