In [2]:
import pandas as pd
import numpy as np

#  Create DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [22, 27, 22, 32],
    'Salary': [50000, 60000, 55000, 70000]
}
df = pd.DataFrame(data)

# Display DataFrame
print("DataFrame:")
print(df)

# 1 Series example (label)
series_example = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'],)
print("\nSeries Example:")
print(series_example)

# 2  DataFrame example (locate, index)

print("\nDataFrame Example:")
print("Accessing 'Age' column:")
print(df['Age'])

print("\nAccessing row with index 1:")
print(df.loc[1])

print("\nAccessing specific cell (Name of row 2):")
print(df.at[2, 'Name'])
print(df.to_string(index=False))


# 3 Read CSV example (head, tail, info, MAX_ROWS)
df_csv = pd.read_csv('data.csv')
print("\nDataFrame from CSV (head):")
print(df_csv.head())

print("\nDataFrame from CSV (tail):")
print(df_csv.tail())

print("\nDataFrame from CSV (info):")
print(df_csv.info())

# Set maximum number of rows to display
pd.set_option('display.max_rows', 10)
print("\nDataFrame with MAX_ROWS set to 10:")
print(df_csv)

# 4 Analyzing Data example
average_age = df['Age'].mean()
print("\nAverage Age:", average_age)

# 5 Cleaning Data example
df_cleaned = df.dropna()
print("\nCleaned DataFrame (dropped NA):")
print(df_cleaned)

# 6 Cleaning Data example (filling NA)
df_filled = df.fillna({'Salary': 0}, inplace=True)
print("\nDataFrame with filled NA:")
print(df_filled)

# 7 Cleaning Empty Cells example
df_empty = df.replace('', np.nan)
print("\nDataFrame with empty cells replaced with NaN:")
print(df_empty)

# Replace NaN with mean of 'Age' column
mean_age = df_empty['Age'].mean()
df_empty['Age'].fillna(mean_age, inplace=True)
print("\nDataFrame with NaN in 'Age' replaced with mean:")
print(df_empty)

# Replace NaN with median of 'Age' column
median_age = df_empty['Age'].median()
df_empty['Age'].fillna(median_age, inplace=True)
print("\nDataFrame with NaN in 'Age' replaced with median:")
print(df_empty)

#REPLACE NaN with mode of 'Age' column
mode_age = df_empty['Age'].mode()[0]
df_empty['Age'].fillna(mode_age, inplace=True)
print("\nDataFrame with NaN in 'Age' replaced with mode:")
print(df_empty)

# 8 Cleaning Wrong Format example
df['Age'] = df['Age'].astype(str)
print("\nDataFrame with Age as string:")
print(df)

# 9 Cleaning Wrong Data example
df_corrected = df[df['Age'].apply(lambda x: x.isdigit())]
print("\nDataFrame with corrected Age data:")
print(df_corrected)

# 10 Removing Duplicates example
df_no_duplicates = df.drop_duplicates()
print("\nDataFrame with duplicates removed:")
print(df_no_duplicates)

# 11 Correlations example
correlation = df.select_dtypes(include=[np.number]).corr()
print("\nDataFrame with correlations:")
print(correlation)

# Save the DataFrame to a CSV file
df.to_csv('output.csv', index=False)
print("\nDataFrame saved to 'output.csv'.")

DataFrame:
      Name  Age  Salary
0    Alice   22   50000
1      Bob   27   60000
2  Charlie   22   55000
3    David   32   70000

Series Example:
a    1
b    2
c    3
d    4
dtype: int64

DataFrame Example:
Accessing 'Age' column:
0    22
1    27
2    22
3    32
Name: Age, dtype: int64

Accessing row with index 1:
Name        Bob
Age          27
Salary    60000
Name: 1, dtype: object

Accessing specific cell (Name of row 2):
Charlie
   Name  Age  Salary
  Alice   22   50000
    Bob   27   60000
Charlie   22   55000
  David   32   70000

DataFrame from CSV (head):
  Name\tAge\tSalary\tExperience
0           Alice\t24\t50000\t2
1             Bob\t27\t60000\t5
2         Charlie\t22\t55000\t0
3          David\t32\t70000\t10
4             Eve\t29\t80000\t7

DataFrame from CSV (tail):
   Name\tAge\tSalary\tExperience
8              Ian\t31\t90000\t8
9             Jack\t25\t62000\t1
10           Kathy\t23\t58000\t2
11           Liam\t34\t95000\t12
12           Mia\t35\t100000\t15

DataFrame