# Pandas Series
1. Series is a 1-Dimensional labeled array capable of holding any data type. The axos labels are collectively called the index.

In [96]:
import numpy as np
import pandas as pd

In [2]:
labels = ['a','b','c']
my_list = [1,2,3]
arr = np.array([10,20,30])
d = {1:10, 2:20, 3:30}

In [3]:
pd.Series(arr, index=labels)

a    10
b    20
c    30
dtype: int64

In [4]:
pd.Series(d)

1    10
2    20
3    30
dtype: int64

In [5]:
data = {
    'Name':['Amit','Brijesh','Ashish','Ayush'],
    'Age': [20,25,23,21],
    'City': ['Lunva','Lunva','Dahod','Mehsana'],
    'salary': [51000, 30000, 40000, 45000]
}

In [6]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,salary
0,Amit,20,Lunva,51000
1,Brijesh,25,Lunva,30000
2,Ashish,23,Dahod,40000
3,Ayush,21,Mehsana,45000


In [7]:
data_list = list(zip(data['Name'], data['Age'], data['City'], data['salary']))
data_list

[('Amit', 20, 'Lunva', 51000),
 ('Brijesh', 25, 'Lunva', 30000),
 ('Ashish', 23, 'Dahod', 40000),
 ('Ayush', 21, 'Mehsana', 45000)]

In [8]:
column = ['Name','Age','City','Salary']
df2 = pd.DataFrame(data_list, columns=column)
df2

Unnamed: 0,Name,Age,City,Salary
0,Amit,20,Lunva,51000
1,Brijesh,25,Lunva,30000
2,Ashish,23,Dahod,40000
3,Ayush,21,Mehsana,45000


In [9]:
df2[['Name','City']]

Unnamed: 0,Name,City
0,Amit,Lunva
1,Brijesh,Lunva
2,Ashish,Dahod
3,Ayush,Mehsana


In [10]:
# New Column
df2['Designation'] = ['Eng.','BSF','Lawyer','Eng.']
df2

Unnamed: 0,Name,Age,City,Salary,Designation
0,Amit,20,Lunva,51000,Eng.
1,Brijesh,25,Lunva,30000,BSF
2,Ashish,23,Dahod,40000,Lawyer
3,Ayush,21,Mehsana,45000,Eng.


In [11]:
df2.drop('Designation', axis=1, inplace=True) # without inplace, it will just show the copy of drop data

In [12]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Amit,20,Lunva,51000
1,Brijesh,25,Lunva,30000
2,Ashish,23,Dahod,40000
3,Ayush,21,Mehsana,45000


In [13]:
df2.drop(1,axis=0)

Unnamed: 0,Name,Age,City,Salary
0,Amit,20,Lunva,51000
2,Ashish,23,Dahod,40000
3,Ayush,21,Mehsana,45000


In [14]:
# Seleting rows

df2.loc[[0,3]]

Unnamed: 0,Name,Age,City,Salary
0,Amit,20,Lunva,51000
3,Ayush,21,Mehsana,45000


In [15]:
df2.iloc[2]

Name      Ashish
Age           23
City       Dahod
Salary     40000
Name: 2, dtype: object

In [16]:
# Selecting subset of Rows and Columns

df2.loc[[0,1]][["City","Salary"]]

Unnamed: 0,City,Salary
0,Lunva,51000
1,Lunva,30000


In [17]:
df2.loc[[2,3]][['Name','Age']]

Unnamed: 0,Name,Age
2,Ashish,23
3,Ayush,21


Conditional Selection

In [26]:
df2

Unnamed: 0,Name,Age,City,Salary
0,Amit,20,Lunva,51000
1,Brijesh,25,Lunva,30000
2,Ashish,23,Dahod,40000
3,Ayush,21,Mehsana,45000


In [34]:
df2[(df2["Age"] > 20) & (df2["City"] == "Lunva")]

Unnamed: 0,Name,Age,City,Salary
1,Brijesh,25,Lunva,30000


# Missing Data

In [65]:
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [1, 2, 3, 4, 5],
    'C': [1, 2, 3, np.nan, np.nan],
    'D': [1, np.nan, np.nan, np.nan, 5]
}
df = pd.DataFrame(data)

In [66]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [67]:
df.isna().sum()

A    1
B    0
C    2
D    3
dtype: int64

In [68]:
df.isna().any()

A     True
B    False
C     True
D     True
dtype: bool

In [71]:
df.dropna()

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0


In [79]:
df.dropna(thresh=3)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
4,5.0,5,,5.0


In [84]:
df.fillna(1)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,1.0
2,1.0,3,3.0,1.0
3,4.0,4,1.0,1.0
4,5.0,5,1.0,5.0


In [85]:
values = {'A':100, 'B':200, 'C':300, 'D':400}
df.fillna(value=values)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,400.0
2,100.0,3,3.0,400.0
3,4.0,4,300.0,400.0
4,5.0,5,300.0,5.0


In [86]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,2.0,3.0
4,5.0,5,2.0,5.0


# Merging, Joining and Concatenation

In [97]:
employees = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'name': ['John', 'Anna', 'Peter', 'Linda', 'Bob'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'HR']
})

# DataFrame 2: Salary information	
salaries = pd.DataFrame({
    'employee_id': [1, 2, 3, 6, 7],
    'salary': [60000, 80000, 65000, 70000, 90000],
    'bonus': [5000, 10000, 7000, 8000, 12000]
})

In [98]:
employees

Unnamed: 0,employee_id,name,department
0,1,John,HR
1,2,Anna,IT
2,3,Peter,Finance
3,4,Linda,IT
4,5,Bob,HR


In [99]:
salaries

Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


In [105]:
pd.merge(employees, salaries, on='employee_id', how='outer')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


In [106]:
pd.merge(employees, salaries, on='employee_id', how='left')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


In [107]:
pd.merge(employees, salaries, on='employee_id', how='right')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


In [111]:
# Concatination 

df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5']
})

In [112]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [114]:
pd.concat([df1,df2])

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [117]:
pd.concat([df1,df2], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C2,A5,B5,C5


**Joining 2 Data Frames**

In [118]:
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1, 2, 3])

# Second DataFrame
df2 = pd.DataFrame({
    'score': [85, 90, 75]
}, index=[2, 3, 4])

In [120]:
df1

Unnamed: 0,name
1,Alice
2,Bob
3,Charlie


In [121]:
df2

Unnamed: 0,score
2,85
3,90
4,75


In [122]:
df1.join(df2)

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0


In [128]:
df1.join(df2, how="outer")

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0


In [129]:
df2.join(df1)

Unnamed: 0,score,name
2,85,Bob
3,90,Charlie
4,75,
