<a href="https://colab.research.google.com/github/Tahira148/Data-Processing---Python-Development-and-Pandas/blob/main/Assignment_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Python and Pandas Assessment Notebook

import pandas as pd
import numpy as np
from functools import reduce


# Task 1: Create a DataFrame using Pandas

print("Task 1: Create a DataFrame\n")
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 28],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print(df)
print("\nData types:\n", df.dtypes)
print("*" * 60)



# Task 2: Row and Column Manipulation

print("Task 2: Drop 'City' column\n")
df_dropped = df.drop(columns=['City'])
print(df_dropped)
print("\nRemaining columns:", df_dropped.columns.tolist())
print("*" * 60)



# Task 3: Handling Null Values

print("Task 3: Handling Null Values\n")
df_null = pd.DataFrame({
    'Name': ['Alice', None, 'Charlie'],
    'Age': [25, np.nan, 35],
    'City': ['New York', 'Chicago', None]
})

print("Original DataFrame with nulls:")
print(df_null)

# Fill nulls appropriately
df_filled = df_null.fillna({
    'Name': 'Unknown',
    'Age': 0,
    'City': 'Unknown'
})
print("\nDataFrame after filling nulls:")
print(df_filled)
print("*" * 60)



# Task 4: GroupBy and Describe

print("Task 4: GroupBy and Describe\n")
df_group = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'B', 'A', 'C'],
    'Value': [10, 20, 15, 25, 30, 35]
})

grouped = df_group.groupby('Category')['Value'].describe()
print(grouped)
print("\nInterpretation: Each category shows summary statistics for the 'Value' column.")
print("*" * 60)


# Task 5: Concatenation and Merging
#
print("Task 5: Concatenation and Merging\n")
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
df3 = pd.DataFrame({'C': [9, 10, 11, 12], 'D': [13, 14, 15, 16]})

# Concatenate vertically
df_concat = pd.concat([df1, df2], ignore_index=True)
print("Concatenated DataFrame:\n", df_concat)

# Merge horizontally
df_merged = pd.concat([df_concat, df3.reset_index(drop=True)], axis=1)
print("\nMerged DataFrame:\n", df_merged)
print("*" * 60)



# Task 6: Tuples and Sets

print("Task 6: Tuples and Sets \n")
fruits = ('apple', 'banana', 'cherry')
numbers = {1, 2, 3, 4, 5}

print("Original tuple:", fruits)
print("Original set:", numbers)

# Tuples are immutable; we cannot add elements directly
try:
    fruits += ('orange',)  # This creates a new tuple
    print("\nNew tuple after adding element:", fruits)
except Exception as e:
    print("Error adding to tuple:", e)

# Sets are mutable; we can add elements directly
numbers.add(6)
print("Set after adding an element:", numbers)
print("*" * 60)



# Task 7: Dictionaries

print("Task 7: Dictionaries ")
students = {'Alice': 85, 'Bob': 90, 'Charlie': 78}
print("Original dictionary:", students)

# Update existing student's score
students['Bob'] = 95

# Add a new student
students['David'] = 88
print("Updated dictionary:", students)
print("*" * 60)


# Task 8: Functions and Lambda

print("Task 8: Functions and Lambda\n")

def square(x):
    """Regular function to calculate the square of a number."""
    return x ** 2

square_lambda = lambda x: x ** 2

for n in [3, 5]:
    print(f"Square of {n} using function: {square(n)}")
    print(f"Square of {n} using lambda: {square_lambda(n)}")
print("*" * 60)



# Task 9: Iterators and Generators

print("Task 9: Iterators and Generators\n")

class EvenIterator:
    def __init__(self, limit):
        self.limit = limit
        self.num = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.num < self.limit:
            result = self.num * 2
            self.num += 1
            return result
        else:
            raise StopIteration

print("Iterator output:")
for num in EvenIterator(5):
    print(num, end=" ")

print("\n\nGenerator output:")
def even_generator(limit):
    for i in range(limit):
        yield i * 2

for num in even_generator(5):
    print(num, end=" ")
print("\n" + "*" * 60)



# Task 10: Map, Reduce, and Filter

print("Task 10: Map, Reduce, and Filter\n")
numbers = [1, 2, 3, 4, 5]

squared = list(map(lambda x: x ** 2, numbers))
product = reduce(lambda a, b: a * b, numbers)
evens = list(filter(lambda x: x % 2 == 0, numbers))

print("Original numbers:", numbers)
print("Squared (map):", squared)
print("Product (reduce):", product)
print("Even numbers (filter):", evens)
print("*" * 60)



# Task 11: Object-Oriented Programming - Rectangle Class

print("Task 11: Rectangle Class\n")

class Rectangle:
    def __init__(self, length, width):
        self.length = length
        self.width = width

    def area(self):
        return self.length * self.width

    def perimeter(self):
        return 2 * (self.length + self.width)

rect1 = Rectangle(4, 5)
rect2 = Rectangle(7, 3)

print(f"Rectangle 1 - Area: {rect1.area()}, Perimeter: {rect1.perimeter()}")
print(f"Rectangle 2 - Area: {rect2.area()}, Perimeter: {rect2.perimeter()}")
print("*" * 60)



# Task 12: Pandas Data Analysis

print("Task 12: Pandas Data Analysis\n")
df_employees = pd.DataFrame({
    'Name': ['John', 'Jane', 'Bob', 'Alice', 'Charlie'],
    'Department': ['IT', 'HR', 'IT', 'Finance', 'HR'],
    'Salary': [55000, 65000, 70000, 60000, 58000]
})

# 1. Average salary by department
avg_salary = df_employees.groupby('Department')['Salary'].mean()
print("Average salary by department:\n", avg_salary)

# 2. Employees with salary > 60000
high_salary = df_employees[df_employees['Salary'] > 60000]['Name']
print("\nEmployees with salary > 60000:\n", high_salary.tolist())

# 3. Add 'Bonus' column (10% of salary)
df_employees['Bonus'] = df_employees['Salary'] * 0.10
print("\nDataFrame with Bonus column:\n", df_employees)



Task 1: Create a DataFrame

      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles
3    David   28        Chicago

Data types:
 Name    object
Age      int64
City    object
dtype: object
************************************************************
Task 2: Drop 'City' column

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   28

Remaining columns: ['Name', 'Age']
************************************************************
Task 3: Handling Null Values

Original DataFrame with nulls:
      Name   Age      City
0    Alice  25.0  New York
1     None   NaN   Chicago
2  Charlie  35.0      None

DataFrame after filling nulls:
      Name   Age      City
0    Alice  25.0  New York
1  Unknown   0.0   Chicago
2  Charlie  35.0   Unknown
************************************************************
Task 4: GroupBy and Describe

          count       mean        std   min    25%   50%    75%   max
Category