In [1]:
import pandas as pd
import numpy as np

print("Pandas Version:", pd.__version__)

Pandas Version: 2.3.1


In [2]:
# Creating a Series
data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print("Series:\n", series)

Series:
 0    1
1    2
2    3
3    4
4    5
dtype: int64


In [3]:
# Arithmetic Operations
series2 = series + 10
print("\nSeries after adding 10:\n", series2)

# Filtering
filtered_series = series[series > 2]
print("\nFiltered Series (values > 2):\n", filtered_series)

# Statistical Calculations
mean_value = series.mean()
print("\nMean Value of Series:", mean_value)


Series after adding 10:
 0    11
1    12
2    13
3    14
4    15
dtype: int64

Filtered Series (values > 2):
 2    3
3    4
4    5
dtype: int64

Mean Value of Series: 3.0


In [4]:
# Creating a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print("\nDataFrame:\n", df)


DataFrame:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [5]:
# Accessing Columns
print("\nAccessing Name column:\n", df[['Name']])

# Adding a New Column
df['Salary'] = [70000, 80000, 90000]
print("\nDataFrame with Salary:\n", df)

# Dropping a Column
df = df.drop('City', axis=1)
print("\nDataFrame after dropping City:\n", df)

# Return row 0
print("\nRow 0:\n", df.loc[[0]])

# Return row 0 and 1
print("\nRow 0 and 1:\n", df.loc[[0, 1]])


Accessing Name column:
       Name
0    Alice
1      Bob
2  Charlie

DataFrame with Salary:
       Name  Age         City  Salary
0    Alice   25     New York   70000
1      Bob   30  Los Angeles   80000
2  Charlie   35      Chicago   90000

DataFrame after dropping City:
       Name  Age  Salary
0    Alice   25   70000
1      Bob   30   80000
2  Charlie   35   90000

Row 0:
     Name  Age  Salary
0  Alice   25   70000

Row 0 and 1:
     Name  Age  Salary
0  Alice   25   70000
1    Bob   30   80000


In [6]:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}
df2 = pd.DataFrame(data, index = ["day1", "day2", "day3"])
print("\nDataFrame with Named Index:\n", df2)


DataFrame with Named Index:
       calories  duration
day1       420        50
day2       380        40
day3       390        45


In [7]:
# Example DataFrame
Biodata = {
    'Name': ['John', 'Emily', 'Mike', 'Lisa'],
    'Age': [28, 23, 35, 31],
    'Gender': ['M', 'F', 'M', 'F']
}
df3 = pd.DataFrame(Biodata)

# Save to CSV
df3.to_csv('Biodata.csv', index=False)
print("\nDataFrame saved to Biodata.csv")

# Read CSV back
dat = pd.read_csv("Biodata.csv")
print("\nRead CSV File:\n", dat)


DataFrame saved to Biodata.csv

Read CSV File:
     Name  Age Gender
0   John   28      M
1  Emily   23      F
2   Mike   35      M
3   Lisa   31      F


In [8]:
print("\nData Info:")
print(dat.info())

print("\nHead:\n", dat.head())
print("\nTail:\n", dat.tail())
print("\nDescribe:\n", dat.describe())


Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   Gender  4 non-null      object
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes
None

Head:
     Name  Age Gender
0   John   28      M
1  Emily   23      F
2   Mike   35      M
3   Lisa   31      F

Tail:
     Name  Age Gender
0   John   28      M
1  Emily   23      F
2   Mike   35      M
3   Lisa   31      F

Describe:
              Age
count   4.000000
mean   29.250000
std     5.057997
min    23.000000
25%    26.750000
50%    29.500000
75%    32.000000
max    35.000000


In [9]:
print("\nSelect Name column:\n", dat[['Name']])
print("\nSelect Name and Gender columns:\n", dat[['Name','Gender']])
print("\nSelect row with index 1:\n", dat.loc[[1]])


Select Name column:
     Name
0   John
1  Emily
2   Mike
3   Lisa

Select Name and Gender columns:
     Name Gender
0   John      M
1  Emily      F
2   Mike      M
3   Lisa      F

Select row with index 1:
     Name  Age Gender
1  Emily   23      F


In [10]:
dat['Age'] = dat['Age'] * 2  # Modify a column
print("\nModified Age column:\n", dat)

dat['NewCol'] = dat['Age'] + 5  # Create new column
print("\nDataFrame with NewCol:\n", dat)

dat = dat.drop(columns=['NewCol'])  # Drop a column
print("\nAfter dropping NewCol:\n", dat)

dat = dat.drop(index=[0])  # Drop a row
print("\nAfter dropping row 0:\n", dat)


Modified Age column:
     Name  Age Gender
0   John   56      M
1  Emily   46      F
2   Mike   70      M
3   Lisa   62      F

DataFrame with NewCol:
     Name  Age Gender  NewCol
0   John   56      M      61
1  Emily   46      F      51
2   Mike   70      M      75
3   Lisa   62      F      67

After dropping NewCol:
     Name  Age Gender
0   John   56      M
1  Emily   46      F
2   Mike   70      M
3   Lisa   62      F

After dropping row 0:
     Name  Age Gender
1  Emily   46      F
2   Mike   70      M
3   Lisa   62      F


In [11]:
data = {
    'A': [np.nan, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'B': np.random.normal(50, 15, 10),
    'C': np.random.rand(10) * 100,
    'D': np.linspace(1, 10, 10),
    'E': np.logspace(1, 2, 10)
}
df_task = pd.DataFrame(data)
print("\nTask DataFrame:\n", df_task)


Task DataFrame:
       A          B          C     D           E
0   NaN  57.367060  40.574349   1.0   10.000000
1   2.0  49.280104  44.734003   2.0   12.915497
2   3.0  68.601940  65.612411   3.0   16.681005
3   4.0  47.289669  27.095635   4.0   21.544347
4   5.0  30.676522  12.000101   5.0   27.825594
5   6.0  65.149321  94.912368   6.0   35.938137
6   7.0  46.614768  34.772780   7.0   46.415888
7   8.0  40.653001  88.727640   8.0   59.948425
8   9.0  56.177386  56.758594   9.0   77.426368
9  10.0  22.336022  12.244709  10.0  100.000000


In [14]:
dat = pd.read_csv("data.csv")
print(dat.info())
# shows first and last five rows
print(dat.head())
print(dat.tail())
print(dat.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    13 non-null     object
 1   City    13 non-null     object
 2   Number  13 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 444.0+ bytes
None
  Name City  Number
0    A    M       1
1    B    N       4
2    C    V       5
3    D    B       7
4    E    J       8
   Name City  Number
8     I    C       6
9     J    X       7
10    K    Z       3
11    L    S       4
12    M    R       6
          Number
count  13.000000
mean    5.538462
std     2.183857
min     1.000000
25%     4.000000
50%     6.000000
75%     7.000000
max     9.000000


In [15]:
print(dat[['Name']])
print(dat[['Name','Number']])
print(dat.loc[[1]])

   Name
0     A
1     B
2     C
3     D
4     E
5     F
6     G
7     H
8     I
9     J
10    K
11    L
12    M
   Name  Number
0     A       1
1     B       4
2     C       5
3     D       7
4     E       8
5     F       9
6     G       7
7     H       5
8     I       6
9     J       7
10    K       3
11    L       4
12    M       6
  Name City  Number
1    B    N       4


In [16]:
# Accessing Columns
print("\nAccessing Name column:\n", df[['Name']])


Accessing Name column:
       Name
0    Alice
1      Bob
2  Charlie


In [17]:
# Adding a New Column
df['Salary'] = [70000, 80000, 90000]
print("\nDataFrame with Salary:\n", df)


DataFrame with Salary:
       Name  Age  Salary
0    Alice   25   70000
1      Bob   30   80000
2  Charlie   35   90000


In [18]:
# Dropping a Column
df = df.drop('City', axis=1)
print("\nDataFrame after dropping City:\n", df)

KeyError: "['City'] not found in axis"

In [None]:
# Return row 0
print("\nRow 0:\n", df.loc[[0]])

In [None]:
# Return row 0 and 1
print("\nRow 0 and 1:\n", df.loc[[0, 1]])

In [13]:
dat = pd.read_csv("data.csv")
print(dat)

   Name City  Number
0     A    M       1
1     B    N       4
2     C    V       5
3     D    B       7
4     E    J       8
5     F    G       9
6     G    F       7
7     H    D       5
8     I    C       6
9     J    X       7
10    K    Z       3
11    L    S       4
12    M    R       6
