In [None]:
import pandas as pd
import numpy as np

# Data for questions
course_name = ['Data Science', 'Machine Learning', 'Big Data', 'Data Engineer']
duration = [2, 3, 6, 4]
df = pd.DataFrame(data={'course_name': course_name, 'duration': duration})

# Q1. Print the data present in the second row of the dataframe, df
print("Q1. Data in the second row of the dataframe:")
print(df.iloc[1])

# Q2. Difference between loc and iloc
"""
- .loc[]: Label-based indexing. You use the row and column labels to access data.
- .iloc[]: Integer-based indexing. You use the row and column indices to access data.
"""

# Q3. Reindex the dataframe and check output
reindex = [3, 0, 1, 2]
new_df = df.reindex(reindex)
print("\nQ3. Reindexed DataFrame:")
print(new_df)
print("\nnew_df.loc[2]:")
print(new_df.loc[2])
print("\nnew_df.iloc[2]:")
print(new_df.iloc[2])

# Q4. Statistical measurements
# Creating a new dataframe with numerical data for demonstration
df1 = pd.DataFrame({
    'column_1': [10, 20, 30, 40, 50],
    'column_2': [5, 15, 25, 35, 45]
})

print("\nQ4. Statistical measurements:")
print("Mean of each column:")
print(df1.mean())
print("\nStandard deviation of column 'column_2':")
print(df1['column_2'].std())

# Q5. Replace data in column_2 and find mean
df1.loc[1, 'column_2'] = 'String'
try:
    mean_column_2 = df1['column_2'].astype(float).mean()
except Exception as e:
    print("\nQ5. Error:", e)

# Q6. Windows function in pandas
"""
Windows functions are used for operations that involve a moving window or a set of data points. They include:
- Rolling window functions (e.g., .rolling())
- Expanding window functions (e.g., .expanding())
- EWM (Exponentially Weighted Functions) (e.g., .ewm())
"""

# Q7. Print current month and year
print("\nQ7. Current month and year:")
print(pd.Timestamp.now().strftime('%Y-%m'))

# Q8. Calculate time difference between two dates
from datetime import datetime

date1 = input("Enter the first date (YYYY-MM-DD): ")
date2 = input("Enter the second date (YYYY-MM-DD): ")

date1 = pd.to_datetime(date1)
date2 = pd.to_datetime(date2)

delta = date2 - date1
days = delta.days
hours = delta.total_seconds() // 3600
minutes = delta.total_seconds() // 60

print(f"\nQ8. Difference: {days} days, {hours} hours, {minutes} minutes")

# Q9. Read CSV and convert column to categorical type
def process_categorical_csv(file_path, column_name, categories):
    df = pd.read_csv(file_path)
    df[column_name] = pd.Categorical(df[column_name], categories=categories, ordered=True)
    print("\nQ9. Sorted DataFrame:")
    print(df.sort_values(by=column_name))

# Q10. Read CSV and visualize sales data with stacked bar chart
def plot_sales_csv(file_path):
    df = pd.read_csv(file_path)
    df.set_index('Date').plot(kind='bar', stacked=True)
    plt.title('Sales Data by Product Category')
    plt.xlabel('Date')
    plt.ylabel('Sales')
    plt.show()

# Q11. Calculate mean, median, and mode of test scores
def analyze_student_scores(file_path):
    df = pd.read_csv(file_path)
    mean = df['Test Score'].mean()
    median = df['Test Score'].median()
    mode = df['Test Score'].mode().tolist()
    
    result = pd.DataFrame({
        'Statistic': ['Mean', 'Median', 'Mode'],
        'Value': [mean, median, ', '.join(map(str, mode))]
    })
    
    print("\nQ11. Mean, Median, and Mode of Test Scores:")
    print(result)

# Example file paths (adjust these to your actual file locations):
# process_categorical_csv('categorical_data.csv', 'Category', ['Low', 'Medium', 'High'])
# plot_sales_csv('sales_data.csv')
# analyze_student_scores('student_data.csv')
