In [2]:
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
s = pd.Series([1,2,3,4],
                index=['a', 'b', 'c', 'd'])
s

a    1
b    2
c    3
d    4
dtype: int64

In [4]:
data = [
    {'name': 'naseff', 'age':23, "place":'tirur'},
    {'name': 'akash', 'age':24, 'place':'vadakara'}
]

df = pd.DataFrame(data)

In [5]:
df.shape

(2, 3)

In [6]:
df.drop('age', axis=1)

Unnamed: 0,name,place
0,naseff,tirur
1,akash,vadakara


In [7]:
tips = sns.load_dataset('tips')

In [8]:
tips.loc[2:4]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [9]:
tips.loc[::2]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
2,21.01,3.50,Male,No,Sun,Dinner,3
4,24.59,3.61,Female,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
8,15.04,1.96,Male,No,Sun,Dinner,2
...,...,...,...,...,...,...,...
234,15.53,3.00,Male,Yes,Sat,Dinner,2
236,12.60,1.00,Male,Yes,Sat,Dinner,2
238,35.83,4.67,Female,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2


In [10]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Amanda', 'Charlie', 'Anna', 'David', 'Andrew', 'Emily'],
    'Age': [25, 30, 28, 22, 35, 40, 32, 26],
    'City': ['Mumbai', 'Delhi', 'Bangalore', 'Chennai', 'Pune', 'Hyderabad', 'Kolkata', 'Ahmedabad'],
    'Salary': [50000, 60000, 55000, 45000, 70000, 80000, 65000, 48000]
})

df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,Mumbai,50000
1,Bob,30,Delhi,60000
2,Amanda,28,Bangalore,55000
3,Charlie,22,Chennai,45000
4,Anna,35,Pune,70000
5,David,40,Hyderabad,80000
6,Andrew,32,Kolkata,65000
7,Emily,26,Ahmedabad,48000


In [11]:
df[df['Name'].str.startswith('A')]

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,Mumbai,50000
2,Amanda,28,Bangalore,55000
4,Anna,35,Pune,70000
6,Andrew,32,Kolkata,65000


In [12]:
df.set_index(df['Name'])

Unnamed: 0_level_0,Name,Age,City,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,Alice,25,Mumbai,50000
Bob,Bob,30,Delhi,60000
Amanda,Amanda,28,Bangalore,55000
Charlie,Charlie,22,Chennai,45000
Anna,Anna,35,Pune,70000
David,David,40,Hyderabad,80000
Andrew,Andrew,32,Kolkata,65000
Emily,Emily,26,Ahmedabad,48000


In [13]:
sample_data = {
    'Name': ['Alice', np.nan, 'Charlie', np.nan, 'Eve'],
    'Age': [25, np.nan, np.nan, np.nan, 30],
    'City': ['Mumbai', np.nan, 'Chennai', np.nan, np.nan],
    'Score': [85, np.nan, 92, np.nan, 78]
}
df = pd.DataFrame(sample_data)
df

Unnamed: 0,Name,Age,City,Score
0,Alice,25.0,Mumbai,85.0
1,,,,
2,Charlie,,Chennai,92.0
3,,,,
4,Eve,30.0,,78.0


In [14]:
df.fillna('Unknown')

Unnamed: 0,Name,Age,City,Score
0,Alice,25.0,Mumbai,85.0
1,Unknown,Unknown,Unknown,Unknown
2,Charlie,Unknown,Chennai,92.0
3,Unknown,Unknown,Unknown,Unknown
4,Eve,30.0,Unknown,78.0


In [15]:
df.dropna(thresh=1)

Unnamed: 0,Name,Age,City,Score
0,Alice,25.0,Mumbai,85.0
2,Charlie,,Chennai,92.0
4,Eve,30.0,,78.0


In [16]:
df['Age'].fillna(df['Age'].median())

0    25.0
1    27.5
2    27.5
3    27.5
4    30.0
Name: Age, dtype: float64

In [17]:
df.isnull().sum()

Name     2
Age      3
City     3
Score    2
dtype: int64

In [18]:
df["Age"].interpolate()

0    25.00
1    26.25
2    27.50
3    28.75
4    30.00
Name: Age, dtype: float64

In [19]:
df['Age'].sort_values(ascending=False)

4    30.0
0    25.0
1     NaN
2     NaN
3     NaN
Name: Age, dtype: float64

In [20]:
df['Age'].min()

np.float64(25.0)

In [21]:
df['Age'].unique()

array([25., nan, 30.])

In [22]:
df[df["Age"] > 25]

Unnamed: 0,Name,Age,City,Score
4,Eve,30.0,,78.0


In [23]:
df

Unnamed: 0,Name,Age,City,Score
0,Alice,25.0,Mumbai,85.0
1,,,,
2,Charlie,,Chennai,92.0
3,,,,
4,Eve,30.0,,78.0


In [24]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2'],
    'D': ['D0', 'D1', 'D2']
}, index=[0, 1, 2])

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5'],
    'D': ['D3', 'D4', 'D5']
}, index=[3, 4, 5])

In [28]:
pd.concat((df1, df2), axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,,,,
1,A1,B1,C1,D1,,,,
2,A2,B2,C2,D2,,,,
3,,,,,A3,B3,C3,D3
4,,,,,A4,B4,C4,D4
5,,,,,A5,B5,C5,D5


In [29]:
left = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
})

right = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K4'],
    'C': ['C0', 'C1', 'C2', 'C4'],
    'D': ['D0', 'D1', 'D2', 'D4']
})

In [34]:
pd.merge(left, right, on='key', how='inner')

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2


In [36]:
left.join(right, how='right', lsuffix='l', rsuffix='r')

Unnamed: 0,keyl,A,B,keyr,C,D
0,K0,A0,B0,K0,C0,D0
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,K3,A3,B3,K4,C4,D4


In [37]:
df_melt = pd.DataFrame({
    'student': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 90, 78],
    'English': [88, 85, 82],
    'Science': [92, 87, 80]
})


In [38]:
pd.melt(df_melt, id_vars=['student'], value_vars=['Math', 'English', 'Science'], value_name='Subjects', var_name='Score')

Unnamed: 0,student,Score,Subjects
0,Alice,Math,85
1,Bob,Math,90
2,Charlie,Math,78
3,Alice,English,88
4,Bob,English,85
5,Charlie,English,82
6,Alice,Science,92
7,Bob,Science,87
8,Charlie,Science,80


In [39]:
df_pivot = pd.DataFrame({
    'student': ['Alice', 'Alice', 'Alice', 'Bob', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Charlie'],
    'subject': ['Math', 'English', 'Science', 'Math', 'English', 'Science', 'Math', 'English', 'Science'],
    'score': [85, 88, 92, 90, 85, 87, 78, 82, 80]
})

In [40]:
pd.pivot(df_pivot, index="student", columns='subject', values='score')

subject,English,Math,Science
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,88,85,92
Bob,85,90,87
Charlie,82,78,80


In [42]:
df_pivot.groupby('subject')['score'].mean()

subject
English    85.000000
Math       84.333333
Science    86.333333
Name: score, dtype: float64