In [None]:
#Q1
import pandas as pd

# Given DataFrame
course_name = ['Data Science', 'Machine Learning', 'Big Data', 'Data Engineer']
duration = [2, 3, 6, 4]
df = pd.DataFrame(data={'course_name': course_name, 'duration': duration})

# Print data from the second row using iloc
second_row = df.iloc[1]  # Indexing starts at 0, so index 1 is the second row
print("Data in the second row:")
print(second_row)


In [None]:
#Q2
'''
loc and iloc are both essential for selecting data from pandas DataFrames, with loc being label-based and 
inclusive of the endpoint in slicing, while iloc is integer-based and exclusive of the endpoint in slicing.
'''

In [None]:
#Q3
import pandas as pd
import numpy as np

# Define columns and indices
columns = ['column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6']
indices = [1, 2, 3, 4, 5, 6]

# Create a DataFrame with random values
df1 = pd.DataFrame(np.random.rand(6, 6), columns=columns, index=indices)

# Define the new index order
reindex = [3, 0, 1, 2]

# Reindexing the DataFrame df1
new_df = df1.reindex(reindex)

# Output for new_df.loc[2] and new_df.iloc[2]
print("Output for new_df.loc[2]:")
print(new_df.loc[2])

print("\nOutput for new_df.iloc[2]:")
print(new_df.iloc[2])


In [None]:
#Q4
import pandas as pd
import numpy as np

# Assuming df1 is already defined as per the previous example
columns = ['column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6']
indices = [1, 2, 3, 4, 5, 6]
df1 = pd.DataFrame(np.random.rand(6, 6), columns=columns, index=indices)

# (i) Mean of each column
column_means = df1.mean()
print("Mean of each column:")
print(column_means)
print()

# (ii) Standard deviation of column 'column_2'
std_column_2 = df1['column_2'].std()
print("Standard deviation of column 'column_2':")
print(std_column_2)



In [None]:
#Q5
import pandas as pd
import numpy as np

# Assuming df1 is already defined as per the previous example
columns = ['column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6']
indices = [1, 2, 3, 4, 5, 6]
df1 = pd.DataFrame(np.random.rand(6, 6), columns=columns, index=indices)

# Replace data in the second row of 'column_2' with a string variable
string_data = "Replaced String"
df1.loc[2, 'column_2'] = string_data

# Calculate mean of 'column_2'
mean_column_2 = df1['column_2'].mean()

# Print mean of 'column_2'
print("Mean of column 'column_2' after replacement:", mean_column_2)


In [None]:
#Q6
'''
In pandas, window functions (also known as windowing or rolling functions) are powerful tools for performing calculations across a specified subset (window) of data points. 
These functions operate on a sliding window of data defined by some criteria such as a fixed number of rows or a time period. 
Window functions are particularly useful for tasks like calculating moving averages, cumulative sums, rank order statistics, and more.
'''


In [None]:
#Q7
import pandas as pd

# Get current date and time
current_datetime = pd.Timestamp.now()

# Extract month and year
current_month = current_datetime.strftime('%B')  # Full month name
current_year = current_datetime.year

# Print current month and year
print(f"Current month: {current_month}")
print(f"Current year: {current_year}")


In [None]:
#Q8
import pandas as pd

# Function to calculate difference between two dates
def calculate_date_difference(date1, date2):
    # Convert input strings to pandas Timestamp objects
    timestamp1 = pd.Timestamp(date1)
    timestamp2 = pd.Timestamp(date2)
    
    # Calculate timedelta between timestamps
    delta = timestamp2 - timestamp1
    
    # Extract days, hours, and minutes from timedelta
    days_diff = delta.days
    hours_diff = delta.seconds // 3600
    minutes_diff = (delta.seconds % 3600) // 60
    
    return days_diff, hours_diff, minutes_diff

# Prompt user for input
date1 = input("Enter the first date (YYYY-MM-DD): ")
date2 = input("Enter the second date (YYYY-MM-DD): ")

# Calculate the difference
days, hours, minutes = calculate_date_difference(date1, date2)

# Display the result
print(f"\nDifference between {date1} and {date2}:")
print(f"Days: {days}")
print(f"Hours: {hours}")
print(f"Minutes: {minutes}")


In [None]:
#Q9
import pandas as pd

# Function to read CSV, convert column to categorical, and sort
def process_csv(file_path, column_name, category_order):
    # Read CSV into DataFrame
    df = pd.read_csv(file_path)
    
    # Convert specified column to categorical
    df[column_name] = df[column_name].astype('category')
    
    # Set category order if specified
    if category_order:
        df[column_name] = df[column_name].cat.set_categories(category_order, ordered=True)
    
    # Sort DataFrame based on categorical column
    sorted_df = df.sort_values(by=column_name)
    
    return sorted_df

# Prompt user for input
file_path = input("Enter the file path to the CSV file: ")
column_name = input("Enter the column name to convert to categorical: ")
category_order = input("Enter the category order (comma-separated, optional): ").split(',')

# Process CSV file
try:
    sorted_data = process_csv(file_path, column_name, category_order)
    
    # Display sorted data
    print("\nSorted Data:")
    print(sorted_data)
    
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
except KeyError:
    print(f"Error: Column '{column_name}' not found in the CSV file.")
except Exception as e:
    print(f"An error occurred: {str(e)}")



In [None]:
#10
import pandas as pd
import matplotlib.pyplot as plt

# Function to read CSV and plot stacked bar chart
def plot_sales_data(file_path):
    # Read CSV into DataFrame
    df = pd.read_csv(file_path)
    
    # Assuming the CSV has columns like 'Date', 'Product', 'Sales'
    # Convert 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Group by 'Date' and 'Product', summing up 'Sales'
    sales_data = df.groupby(['Date', 'Product'])['Sales'].sum().unstack().fillna(0)
    
    # Plot stacked bar chart
    sales_data.plot(kind='bar', stacked=True, figsize=(10, 6))
    
    # Customize the plot
    plt.title('Stacked Bar Chart of Sales Data')
    plt.xlabel('Date')
    plt.ylabel('Sales')
    plt.legend(title='Product')
    plt.grid(True)
    
    # Show the plot
    plt.show()

# Prompt user for input
file_path = input("Enter the file path to the CSV file: ")

# Plot sales data
try:
    plot_sales_data(file_path)
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
except Exception as e:
    print(f"An error occurred: {str(e)}")


In [None]:
#11
