# 1.

In [None]:
# This will print the data from the second row of the DataFrame df, which corresponds to the course "Machine Learning" with a duration of 3.
print(df.iloc[1])


# 2.

The loc and iloc functions in Pandas are used to slice a data set. The function .loc is primarily used for label indexing, while .iloc is mainly used for integer indexing.

# 3.

In [2]:
import pandas as pd

# Sample DataFrame
data = {'A': [10, 20, 30, 40], 'B': [50, 60, 70, 80]}
df = pd.DataFrame(data)

# Define the reindex order
reindex = [3, 0, 1, 2]

# Reindex the DataFrame
new_df = df.reindex(reindex)

# Output of new_df.loc[2]
output_loc = new_df.loc[2]

# Output of new_df.iloc[2]
output_iloc = new_df.iloc[2]

print("Output of new_df.loc[2]:")
print(output_loc)

print("\nOutput of new_df.iloc[2]:")
print(output_iloc)


Output of new_df.loc[2]:
A    30
B    70
Name: 2, dtype: int64

Output of new_df.iloc[2]:
A    20
B    60
Name: 1, dtype: int64


So, there is a difference in the outputs:

new_df.loc[2] will return the data from the second row in the reindexed DataFrame.

new_df.iloc[2] will return the data from the first row in the reindexed DataFrame.

# 4.

In [3]:
import pandas as pd
import numpy as np

columns = ['column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6']
indices = [1, 2, 3, 4, 5, 6]

# Creating a DataFrame
df1 = pd.DataFrame(np.random.rand(6, 6), columns=columns, index=indices)

# (i) Mean of each and every column
column_means = df1.mean()

# (ii) Standard deviation of 'column_2'
std_dev_column_2 = df1['column_2'].std()

# Print the results
print("Mean of each column:")
print(column_means)
print("\nStandard deviation of 'column_2':")
print(std_dev_column_2)


Mean of each column:
column_1    0.510602
column_2    0.252186
column_3    0.467463
column_4    0.506310
column_5    0.568161
column_6    0.281511
dtype: float64

Standard deviation of 'column_2':
0.2087343029938684


# 5.

In [None]:
import pandas as pd
import numpy as np

columns = ['column_1', 'column_2', 'column_3', 'column_4', 'column_5', 'column_6']
indices = [1, 2, 3, 4, 5, 6]

# Creating a DataFrame with random numeric data
df1 = pd.DataFrame(np.random.rand(6, 6), columns=columns, index=indices)

# Attempt to replace data in 'column_2' with a string (this will raise an error)
df1.loc[2, 'column_2'] = "string_value"

# Calculate the mean of 'column_2' (this will raise an error)
mean_column_2 = df1['column_2'].mean()

print("Mean of 'column_2':")
print(mean_column_2)


When you try to execute this code, you will encounter a ValueError because you cannot mix data types within a column in a pandas DataFrame. The error will occur at the line where you attempt to replace a numeric value with a string in 'column_2'

# 6.

Window functions are commonly used for time-series data or when you need to calculate values over a rolling or expanding window. These functions are typically used with methods like rolling(), expanding(), and ewm() (exponential moving window) in pandas.

Rolling Windows Functions:

rolling() is used to create a rolling view of a Series or DataFrame, which allows you to calculate statistics (e.g., mean, sum, standard deviation) over a specified rolling window of data points. Common rolling window functions include mean(), sum(), std(), min(), max(), and var().
Expanding Windows Functions:

expanding() is used to create an expanding window view, which calculates the cumulative statistics for the entire dataset up to the current point. Common expanding window functions include mean(), sum(), std(), min(), max(), and var().
Exponential Moving Windows Functions (ewm):

The ewm() function is used for exponential moving windows, which assigns exponentially decreasing weights to data points. It's commonly used for smoothing data or calculating exponentially weighted statistics. Functions like mean(), var(), std(), and corr() can be applied using ewm().

# 7.

In [5]:
import pandas as pd
from datetime import datetime

# Get the current date and time
current_datetime = datetime.now()

# Extract the month and year from the current date
current_month = current_datetime.month
current_year = current_datetime.year

# Create a pandas datetime object
current_date = pd.to_datetime(f"{current_year}-{current_month}-01")

# Print the current month and year
print("Current Month:", current_date.strftime("%B"))
print("Current Year:", current_date.year)


Current Month: November
Current Year: 2023


# 8.

In [6]:
import pandas as pd

# Input two dates from the user in the format YYYY-MM-DD
date_str1 = input("Enter the first date (YYYY-MM-DD): ")
date_str2 = input("Enter the second date (YYYY-MM-DD): ")

# Convert the input strings to pandas datetime objects
try:
    date1 = pd.to_datetime(date_str1)
    date2 = pd.to_datetime(date_str2)

    # Calculate the time difference
    time_difference = date2 - date1

    # Extract days, hours, and minutes from the time difference
    days = time_difference.days
    hours, remainder = divmod(time_difference.seconds, 3600)
    minutes, _ = divmod(remainder, 60)

    # Display the result
    print(f"Time difference: {days} days, {hours} hours, {minutes} minutes")

except ValueError:
    print("Invalid date format. Please use the format YYYY-MM-DD.")

except Exception as e:
    print(f"An error occurred: {str(e)}")


Enter the first date (YYYY-MM-DD): 2023-11-04
Enter the second date (YYYY-MM-DD): 2023-11-05
Time difference: 1 days, 0 hours, 0 minutes


# 9.

In [None]:
import pandas as pd

# Prompt the user for the file path
file_path = input("Enter the CSV file path: ")

# Prompt the user for the column name to convert to categorical data type
column_name = input("Enter the column name to convert to categorical data type: ")

# Prompt the user for the category order (comma-separated)
category_order = input("Enter the category order (comma-separated, e.g., category1,category2,category3): ").split(',')

# Read the CSV file into a DataFrame
try:
    df = pd.read_csv(file_path)

    # Convert the specified column to a categorical data type with the specified order
    df[column_name] = pd.Categorical(df[column_name], categories=category_order, ordered=True)

    # Sort the DataFrame by the categorical column
    df = df.sort_values(column_name)

    # Display the sorted data
    print(df)

except FileNotFoundError:
    print(f"File not found at the specified path: {file_path}")

except KeyError:
    print(f"Column not found: {column_name}")

except Exception as e:
    print(f"An error occurred: {str(e)}")


# 10.

In [8]:
pip install pandas matplotlib


Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Function to read CSV file and create a stacked bar chart
def create_stacked_bar_chart(file_path):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Assuming your CSV file has columns 'Date', 'Product', and 'Sales'
    # You may need to adjust these column names to match your CSV file
    pivot_data = df.pivot_table(index='Date', columns='Product', values='Sales', fill_value=0)

    # Create a stacked bar chart
    pivot_data.plot(kind='bar', stacked=True, figsize=(10, 6))

    # Customize the chart labels and title
    plt.xlabel('Date')
    plt.ylabel('Sales')
    plt.title('Stacked Bar Chart of Product Sales Over Time')

    # Show the chart
    plt.legend(title='Product')
    plt.show()

if __name__ == "__main__":
    file_path = input("Enter the file path of the CSV file: ")

    try:
        create_stacked_bar_chart(file_path)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")
