In [1]:
pip install pandas


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


#### Pandas DataFrame comes is a powerful tool that allows us to store and manipulate data in a structured way, 

In [12]:
import numpy as np
import pandas as pd 
data = np.array([["john",20,"Male"],["john",20,"Male"],["",22,"Female"],["hello",22,"Female"],["Henry",25,"Male"],["Smith",30,"Male"],["Susan",27,"Female"]])
df = pd.DataFrame(data,columns=['Name','Age','Gender'])


In [13]:
df.index

RangeIndex(start=0, stop=7, step=1)

In [14]:
print(df.head())

    Name Age  Gender
0   john  20    Male
1   john  20    Male
2         22  Female
3  hello  22  Female
4  Henry  25    Male


In [15]:
print(df.tail())

    Name Age  Gender
2         22  Female
3  hello  22  Female
4  Henry  25    Male
5  Smith  30    Male
6  Susan  27  Female


## Difference Between Series and DataFrame in Pandas

In **Pandas**, both `Series` and `DataFrame` are data structures used for data manipulation.

### Series
- A **Series** is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.).
- It is similar to a column in a spreadsheet or a database table.

### DataFrame
- A **DataFrame** is a two-dimensional labeled data structure with columns of potentially different types.
- It is similar to a table in a database or an Excel spreadsheet.

In [18]:
import pandas as pd
series=pd.Series([10,20,30,40],name="Numbers")
print(series)
print("\nType of series:",type(series))

0    10
1    20
2    30
3    40
Name: Numbers, dtype: int64

Type of series: <class 'pandas.core.series.Series'>


In [64]:
#Creating dataframe
data = {
    "Name":["Alice","Bob","Charlie","David"],
    "Age":[25,30,35,40]
}
df=pd.DataFrame(data)
print(df)
print("\nType of DataFrame:",type(df))

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40

Type of DataFrame: <class 'pandas.core.frame.DataFrame'>


In [47]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [28]:
print(df.index.values)

[0 1 2 3]


In [54]:
df = df.set_index('Name')
print(df)

         Age
Name        
Alice     25
Bob       30
Charlie   35
David     40


In [30]:
df.index.values

array(['Alice', 'Bob', 'Charlie', 'David'], dtype=object)

In [55]:
import pandas as pd
# df=pd.DataFrame({'Name':["Alice","Bob"],'Age':[25,30]})
# print("Before reset_index():")
print(df)
df.reset_index(inplace=True)
print("\n After reset_index() with inplace=True:")
print(df)


         Age
Name        
Alice     25
Bob       30
Charlie   35
David     40

 After reset_index() with inplace=True:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [65]:
df.index = ['a','b','c','d']
print(df)


      Name  Age
a    Alice   25
b      Bob   30
c  Charlie   35
d    David   40


In [66]:
#Reset index
df.reset_index(drop=True,inplace=True)
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [67]:
#Get index as list
index_list=df.index.tolist()
print(index_list)

[0, 1, 2, 3]


In [71]:
df[["Name"]]
type(df[["Name"]])

pandas.core.frame.DataFrame

In [72]:
row=df.loc[1]
print(row)

Name    Bob
Age      30
Name: 1, dtype: object


In [75]:
for i in range(4):
    row=df.loc[i]
    print("\n")
    print(row)



Name    Alice
Age        25
Name: 0, dtype: object


Name    Bob
Age      30
Name: 1, dtype: object


Name    Charlie
Age          35
Name: 2, dtype: object


Name    David
Age        40
Name: 3, dtype: object


In [77]:
rows=df.loc[::1,["Name","Age"]]
print(rows)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [83]:
df.index=[1,2,3,4]
rows=df.loc[2:4:2,["Name"]]
print(rows)

    Name
2    Bob
4  David


In [107]:
data=np.array([["John",20,"Male"],["Kim",23,"Female"],["Henry",25,"Male"],["Smith",30,"Male"],["Hello",40,"World"]])
df = pd.DataFrame(data,columns=['Name','Age','Gender'],index=["A","B","C","D","E"])
print(df)

    Name Age  Gender
A   John  20    Male
B    Kim  23  Female
C  Henry  25    Male
D  Smith  30    Male
E  Hello  40   World


In [89]:
df.loc["A":"D":2,['Name','Age']]

Unnamed: 0,Name,Age
A,John,20
C,Henry,25


In [92]:
df.loc[["A","B"],['Name','Age']]

Unnamed: 0,Name,Age
A,John,20
B,Kim,22


In [108]:
df[(df["Age"].astype(int)>22)&(df["Gender"].astype(str)=="Male")]

Unnamed: 0,Name,Age,Gender
C,Henry,25,Male
D,Smith,30,Male


In [109]:
df[(df["Age"].astype(int)>22)|(df["Gender"].astype(str)=="Male")]

Unnamed: 0,Name,Age,Gender
A,John,20,Male
B,Kim,23,Female
C,Henry,25,Male
D,Smith,30,Male
E,Hello,40,World


In [103]:
df.loc[(df["Age"].astype(int)>22)&(df["Gender"].astype(str)=="Male"),"Age"]=35
print(df)

    Name Age  Gender
A   John  20    Male
B    Kim  22  Female
C  Henry  35    Male
D  Smith  35    Male
E  Hello  40   World


In [110]:
df[df["Age"].astype(int)>22]

Unnamed: 0,Name,Age,Gender
B,Kim,23,Female
C,Henry,25,Male
D,Smith,30,Male
E,Hello,40,World


In [111]:
dicts={'items':["Laptop","Mouse","Book","Pencil"],
      'qunatity':[5,10,15,20],
      'unit price':[700,20,10,2]}
df=pd.DataFrame(dicts)
print(df)

    items  qunatity  unit price
0  Laptop         5         700
1   Mouse        10          20
2    Book        15          10
3  Pencil        20           2


In [118]:
dicts={'items':["Laptop","Mouse","Book","Pencil"],
      'qunatity':[5,10,15,20],
      'unit price':[700,20,10,2]}
dicts2={'colors':["Red","Blue","Yellow","Green"]}
df=pd.DataFrame(dicts)
df2=pd.DataFrame(dicts2)
print(df)
print(df2)
df.info()



    items  qunatity  unit price
0  Laptop         5         700
1   Mouse        10          20
2    Book        15          10
3  Pencil        20           2
   colors
0     Red
1    Blue
2  Yellow
3   Green
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   items       4 non-null      object
 1   qunatity    4 non-null      int64 
 2   unit price  4 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 228.0+ bytes


In [121]:
df.describe()

Unnamed: 0,qunatity,unit price
count,4.0,4.0
mean,12.5,183.0
std,6.454972,344.745317
min,5.0,2.0
25%,8.75,8.0
50%,12.5,15.0
75%,16.25,190.0
max,20.0,700.0


In [123]:
frames=[df,df2]
res=pd.concat(frames,axis=1)
res

Unnamed: 0,items,qunatity,unit price,colors
0,Laptop,5,700,Red
1,Mouse,10,20,Blue
2,Book,15,10,Yellow
3,Pencil,20,2,Green
