# Pandas

In [1]:
import pandas as pd  # Importing the pandas module
import numpy as np  # Importing the numpy module

# Creating a pandas Series 's1' from a list
s1 = pd.Series(data = [10, 11, 12, 13, 14])
print(f"s1: \n{s1}")  # Printing the Series 's1'

print("------------------------")

# Creating a pandas Series 's2' from a list with specified index
s2 = pd.Series(data = [1, 2, 3], index=["first", "second", "third"])
print(f"s2: \n{s2}")  # Printing the Series 's2'

print("------------------------")

# Creating a pandas Series 's3' from a dictionary
s3 = pd.Series(data = {"a": 3.0, "b": 4.0, "c": 5.0})
print(f"s3: \n{s3}")  # Printing the Series 's3'

print("------------------------")

# Creating a pandas Series 's4' with a scalar value repeated for a given range of indices
s4 = pd.Series(data = 10, index = range(10), name = "Series 4")
print(f"s4: \n{s4}")  # Printing the Series 's4'

s1: 
0    10
1    11
2    12
3    13
4    14
dtype: int64
------------------------
s2: 
first     1
second    2
third     3
dtype: int64
------------------------
s3: 
a    3.0
b    4.0
c    5.0
dtype: float64
------------------------
s4: 
0    10
1    10
2    10
3    10
4    10
5    10
6    10
7    10
8    10
9    10
Name: Series 4, dtype: int64


In [2]:
def description(s_index, s):
    # Function to print the description of a pandas Series

    # Printing information about the Series
    print(f"s{s_index}: \nsize = {s.size} \nshape = {s.shape} \ndtype = {s.dtype} \nindex = {s.index}")

# Calling the description function for each Series
description(1, s1)  # Printing description for Series 's1'
print("------------------------")
description(2, s2)  # Printing description for Series 's2'
print("------------------------")
description(3, s3)  # Printing description for Series 's3'
print("------------------------")
description(4, s4)  # Printing description for Series 's4'

s1: 
size = 5 
shape = (5,) 
dtype = int64 
index = RangeIndex(start=0, stop=5, step=1)
------------------------
s2: 
size = 3 
shape = (3,) 
dtype = int64 
index = Index(['first', 'second', 'third'], dtype='object')
------------------------
s3: 
size = 3 
shape = (3,) 
dtype = float64 
index = Index(['a', 'b', 'c'], dtype='object')
------------------------
s4: 
size = 10 
shape = (10,) 
dtype = int64 
index = RangeIndex(start=0, stop=10, step=1)


In [3]:
# Updating the value of the element with index "b" in Series s3
s3["b"] = 9

# Printing the updated Series s3
print(f"Updated s3: \n{s3}")

Updated s3: 
a    3.0
b    9.0
c    5.0
dtype: float64


In [4]:
# Selecting specific elements from the Series s3 corresponding to indices "a" and "c"
s3_subset = s3[["a", "c"]]

# Printing the subset of the Series containing elements with indices "a" and "c"
print(s3_subset)

a    3.0
c    5.0
dtype: float64


In [6]:
# Creating a subset of the Series s3 containing elements with index labels from "a" to "c", inclusive
s3_subset = s3["a" : "c"]
## or s3_subset = s3[(s3.index >= "a") & (s3.index <= "c")]

# Printing the subset of the Series
print(s3_subset)

a    3.0
b    9.0
c    5.0
dtype: float64


In [7]:
# Creating a subset of the Series s3 containing elements greater than 4
s3_subset_1 = s3[s3 > 4]

# Printing the subset of the Series containing elements greater than 4
print(s3_subset_1)

print("------------------------")

# Creating a boolean mask indicating whether each element of s3 is greater than 4
s3_subset_2 = s3 > 4

# Printing the boolean mask indicating whether each element of s3 is greater than 4
print(s3_subset_2)

b    9.0
c    5.0
dtype: float64
------------------------
a    False
b     True
c     True
dtype: bool


In [None]:
# Calculating the mean of the Series s3
s3_mean = s3.mean()

# Printing the mean of the Series s3
print(f"s3_mean = {s3_mean}")

# DataFrame

In [8]:
# Creating a DataFrame 'df' from a list of lists
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])

# Displaying the DataFrame
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


In [9]:
# Creating a NumPy array
ar = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# Creating a DataFrame 'df' from the NumPy array 'ar'
# with specified column names and row indices
df = pd.DataFrame(data = ar, columns=["a", "b", "c"], index=["r1", "r2", "r3"])

# Displaying the DataFrame
df

Unnamed: 0,a,b,c
r1,1,2,3
r2,4,5,6
r3,7,8,9


In [10]:
import pandas as pd  # Importing the pandas module

# Creating Series s1, s2, and s3 with specific index labels
s1 = pd.Series([1, 2], index=['first', 'second'])
s2 = pd.Series([1, 3], index=['first', 'second'])
s3 = pd.Series([2, 3], index=['first', 'second'])

# Creating a DataFrame df from the Series s1, s2, and s3
df = pd.DataFrame({'a': s1, 'b': s2, 'c': s3})

# Modifying the index of the DataFrame
df.index = ['row1', 'row2']

# Printing the size of the DataFrame (number of elements)
print(f"Size = {df.size}")

# Printing the shape of the DataFrame (number of rows, number of columns)
print(f"Shape = {df.shape}")

# Printing the index of the DataFrame
print(f"Index = {df.index}")

# Printing the column names of the DataFrame
print(f"Columns = {df.columns}")


Size = 6
Shape = (2, 3)
Index = Index(['row1', 'row2'], dtype='object')
Columns = Index(['a', 'b', 'c'], dtype='object')


In [11]:
# print dataset
df

Unnamed: 0,a,b,c
row1,1,1,2
row2,2,3,3


In [12]:
# Selecting the column labeled 'a' from the DataFrame df
column_a = df[["a"]]

# Printing the selected column
column_a

Unnamed: 0,a
row1,1
row2,2


In [13]:
# Selecting multiple columns 'a' and 'b' from the DataFrame df
selected_columns = df[["a", "b"]]

# Printing the selected columns
selected_columns

Unnamed: 0,a,b
row1,1,1
row2,2,3


In [14]:
# Adding a new column 'd' to the DataFrame df, calculated by adding 1 to each value in column 'c'
df["d"] = df["c"] + 1

# Printing the updated DataFrame
df

Unnamed: 0,a,b,c,d
row1,1,1,2,3
row2,2,3,3,4


In [15]:
# Multiplying every element in the DataFrame df by 2
df = df * 2

# Printing the updated DataFrame
df

Unnamed: 0,a,b,c,d
row1,2,2,4,6
row2,4,6,6,8


In [16]:
# Updating the column 'c' in the DataFrame df with the provided Series
df['c'] = pd.Series({'row1': 7.3})

# Printing the updated DataFrame
df

Unnamed: 0,a,b,c,d
row1,2,2,7.3,6
row2,4,6,,8


In [17]:
# Dropping the column 'c' from the DataFrame df
df = df.drop(['c'], axis=1)

# Printing the updated DataFrame
df

Unnamed: 0,a,b,d
row1,2,2,6
row2,4,6,8


In [23]:
# Selecting all rows starting from the second row (index 1) until the end of the DataFrame df
'''
iloc [row : column] [:2, :2]
'''
selected_rows = df.iloc[:2, :2]

# Printing the selected rows
selected_rows

Unnamed: 0,a,b
row1,2,2
row2,4,6


In [24]:
# Creating a DataFrame df_2 from a list of lists
df_2 = pd.DataFrame([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], index=["x", "y"], columns=list("abcde"))

# Printing the DataFrame df_2
df_2

Unnamed: 0,a,b,c,d,e
x,1,2,3,4,5
y,6,7,8,9,10


In [25]:
# Subtracting the DataFrame df_2 from 1 and taking the modulo 2 of each element
df_2 = 1 - df_2 % 2

# Printing the updated DataFrame df_2
df_2

Unnamed: 0,a,b,c,d,e
x,0,1,0,1,0
y,1,0,1,0,1


In [26]:
# Comparing each element of the DataFrame df_2 with 1, resulting in a DataFrame of boolean values
df_2 = df_2 == 1

# Printing the updated DataFrame df_2
df_2

Unnamed: 0,a,b,c,d,e
x,False,True,False,True,False
y,True,False,True,False,True


### 📊 Groupby

In [4]:
import pandas as pd

# Sample data
data = {
    'Category': ['A', 'B', 'A', 'B', 'A', 'B'],
    'Value': [10, 20, 30, 15, 25, 35]
}

df = pd.DataFrame(data)

# Grouping by 'Category' and calculating sum
grouped = df.groupby('Category').sum()

print(grouped)

# Expected Output:
          
# Category   Value       
# A            65
# B            70

          Value
Category       
A            65
B            70
