# Pandas

---
![pandas](./img/pandas.jpg)
### A powerful Python library for data manipulation and analysis, providing easy-to-use data structures like DataFrame and Series, ideal for handling structured data and performing complex operations efficiently.

In [1]:
# Importing Pandas library
import pandas as pd

# Creating a Pandas Series
data = [1, 2, 3, 4, 5]
series = pd.Series(data)
series


0    1
1    2
2    3
3    4
4    5
dtype: int64

In [2]:
# Creating a Pandas DataFrame from a dictionary
data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 45],
    'City': ['New York', 'London', 'Paris', 'Tokyo', 'Sydney']
}
data_frame = pd.DataFrame(data_dict)
data_frame

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris
3,David,40,Tokyo
4,Eva,45,Sydney


In [3]:
# Accessing columns of a DataFrame
names = data_frame['Name']
ages = data_frame['Age']
print(names)
print(ages)


0      Alice
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object
0    25
1    30
2    35
3    40
4    45
Name: Age, dtype: int64


In [17]:
# Reading data from a CSV file into a DataFrame
df = pd.read_csv('./data/Iris.csv')

# Let's see Basic DataFrame operations

In [18]:
df.shape # Shape of the DataFrame (rows, columns)

(150, 6)

In [20]:
df.info()  # Information about the DataFrame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [21]:
df.describe()  # Summary statistics of numerical columns

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [47]:
# Filtering rows in a DataFrame
filtered = df[ (6.5 < df['SepalLengthCm'] ) & (6.8 >= df['SepalLengthCm'] ) ]
filtered

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
58,59,6.6,2.9,4.6,1.3,Iris-versicolor
65,66,6.7,3.1,4.4,1.4,Iris-versicolor
75,76,6.6,3.0,4.4,1.4,Iris-versicolor
76,77,6.8,2.8,4.8,1.4,Iris-versicolor
77,78,6.7,3.0,5.0,1.7,Iris-versicolor
86,87,6.7,3.1,4.7,1.5,Iris-versicolor
108,109,6.7,2.5,5.8,1.8,Iris-virginica
112,113,6.8,3.0,5.5,2.1,Iris-virginica
124,125,6.7,3.3,5.7,2.1,Iris-virginica
140,141,6.7,3.1,5.6,2.4,Iris-virginica


In [42]:
# Filtering rows in a DataFrame
filtered = df[(df['SepalLengthCm'] > 7) & (df['PetalLengthCm']<6) ]
filtered

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
102,103,7.1,3.0,5.9,2.1,Iris-virginica
129,130,7.2,3.0,5.8,1.6,Iris-virginica


In [49]:
# Sorting a DataFrame
sorted = filtered.sort_values(by='SepalWidthCm', ascending=True)
sorted

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
108,109,6.7,2.5,5.8,1.8,Iris-virginica
76,77,6.8,2.8,4.8,1.4,Iris-versicolor
58,59,6.6,2.9,4.6,1.3,Iris-versicolor
75,76,6.6,3.0,4.4,1.4,Iris-versicolor
77,78,6.7,3.0,5.0,1.7,Iris-versicolor
112,113,6.8,3.0,5.5,2.1,Iris-virginica
145,146,6.7,3.0,5.2,2.3,Iris-virginica
65,66,6.7,3.1,4.4,1.4,Iris-versicolor
86,87,6.7,3.1,4.7,1.5,Iris-versicolor
140,141,6.7,3.1,5.6,2.4,Iris-virginica


In [57]:
df['Long'] = df['SepalLengthCm'] > 7
df
# df[df["SepalLengthCm"]>6.8]

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Long
0,1,5.1,3.5,1.4,0.2,Iris-setosa,False
1,2,4.9,3.0,1.4,0.2,Iris-setosa,False
2,3,4.7,3.2,1.3,0.2,Iris-setosa,False
3,4,4.6,3.1,1.5,0.2,Iris-setosa,False
4,5,5.0,3.6,1.4,0.2,Iris-setosa,False
...,...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica,False
146,147,6.3,2.5,5.0,1.9,Iris-virginica,False
147,148,6.5,3.0,5.2,2.0,Iris-virginica,False
148,149,6.2,3.4,5.4,2.3,Iris-virginica,False


In [58]:
df["SepalxPetal_Length"] = df['SepalLengthCm'] * df['PetalLengthCm']
df["SepalxPetal_Width"] = df['SepalWidthCm'] * df['PetalWidthCm']
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Long,SepalxPetal_Length,SepalxPetal_Width
0,1,5.1,3.5,1.4,0.2,Iris-setosa,False,7.14,0.70
1,2,4.9,3.0,1.4,0.2,Iris-setosa,False,6.86,0.60
2,3,4.7,3.2,1.3,0.2,Iris-setosa,False,6.11,0.64
3,4,4.6,3.1,1.5,0.2,Iris-setosa,False,6.90,0.62
4,5,5.0,3.6,1.4,0.2,Iris-setosa,False,7.00,0.72
...,...,...,...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica,False,34.84,6.90
146,147,6.3,2.5,5.0,1.9,Iris-virginica,False,31.50,4.75
147,148,6.5,3.0,5.2,2.0,Iris-virginica,False,33.80,6.00
148,149,6.2,3.4,5.4,2.3,Iris-virginica,False,33.48,7.82


In [60]:
df.to_csv('output.csv', index=False)
sorted.to_csv('filtered_and_sorted.csv',index=False)