In [None]:
'''
1. Write a python program to find Minkowskii Distance between two points. 

2. Write a Python NumPy program to compute the weighted average along the specified axis 
of a given flattened array. 
From Wikipedia: The weighted arithmetic mean is similar to an ordinary arithmetic mean 
(the most common type of average), except that instead of each of the data points 
contributing equally to the final average, some data points contribute more than others. The 
notion of weighted mean plays a role in descriptive statistics and also occurs in a more 
general form in several other areas of mathematics. 
    Sample output: 
    Original flattened array: 
     [[0 1 2] 
     [3 4 5] 
     [6 7 8]] 
    Weighted average along the specified axis of the above flattened array: 
    [1.2 4.2 7.2] 

3. Write a NumPy program to compute cross-correlation of two given arrays. 
    Sample Output: 
    Original array1: 
     [0 1 3] 
    Original array2: 
     [2 4 5] 
    Cross-correlation of the said arrays: 
     [[2.33333333 2.16666667] 
     [2.16666667 2.33333333]] 

4. Download any dataset from UCI (do not repeat it from set B). Read this csv file using 
read_csv() function. Describe the dataset using appropriate function. Display mean value 
of numeric attribute. Check any data values are missing or not. 

5. Download nursery dataset from UCI. Split dataset on any one categorical attribute. 
Compare the means of each split. (Use groupby) 

6. Create one dataframe with 5 subjects and marks of 10 students for each subject. Find 
arithmetic mean, geometric mean, and harmonic mean. 

7. Download any csv file of your choice and display details about data using pandas profiling. 
Show stats in HTML form. 
'''

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import gmean, hmean   # For geometric & harmonic mean

In [None]:
'''
Write a python program to find Minkowskii Distance between two points.
'''

point1 = np.array([1, 2])
point2 = np.array([4, 6])
# order p = 3 (example), formula: (sum(|x-y|^p))^(1/p)
p = 3
minkowski_distance = np.linalg.norm(point1 - point2, ord=p)
print("Minkowski Distance (p=3):", minkowski_distance)

In [None]:
'''Write a Python NumPy program to compute the weighted average along the specified axis 
of a given flattened array.
'''

arr = np.arange(9).reshape(3, 3)
print("\nOriginal flattened array:\n", arr)
weights = np.array([0.2, 0.3, 0.5])
weighted_avg = np.average(arr, axis=1, weights=weights)
print("Weighted average along axis=1:", weighted_avg)

In [None]:
'''
Write a NumPy program to compute cross-correlation of two given arrays.
'''

a1 = np.array([0, 1, 3])
a2 = np.array([2, 4, 5])
print("\nOriginal array1:", a1)
print("Original array2:", a2)
cross_corr = np.corrcoef(a1, a2)
print("Cross-correlation of the said arrays:\n", cross_corr)

In [None]:
'''
Download any dataset from UCI (do not repeat it from set B). Read this csv file using 
read_csv() function. Describe the dataset using appropriate function. Display mean value 
of numeric attribute. Check any data values are missing or not.
'''
# NOTE: Replace 'winequality-red.csv' with actual downloaded CSV file path (Or download it from the CSV files folder)
wine = pd.read_csv("winequality-red.csv")

print("\nDataset description:\n", wine.describe(include="all"))
print("\nMean values of numeric attributes:\n", wine.select_dtypes(include="number").mean())
print("\nMissing values:\n", wine.isnull().sum())

In [None]:
'''
Download nursery dataset from UCI. 
Split dataset on any one categorical attribute. 
Compare the means of each split. (Use groupby)
'''
nursery = pd.read_csv("nursery.csv")

# Example: split on 'parents' column
print("\nGrouped mean by 'parents' attribute:\n", nursery.groupby('parents').mean(numeric_only=True))


In [None]:
'''
Create one dataframe with 5 subjects and marks of 10 students for each subject. Find 
arithmetic mean, geometric mean, and harmonic mean.
'''

marks = pd.DataFrame({
    "Math": np.random.randint(40, 100, 10),
    "Science": np.random.randint(40, 100, 10),
    "English": np.random.randint(40, 100, 10),
    "History": np.random.randint(40, 100, 10),
    "Computer": np.random.randint(40, 100, 10)
})

print("\nMarks Dataframe:\n", marks)

arith_mean = marks.mean()
geo_mean = gmean(marks, axis=0)
harm_mean = hmean(marks, axis=0)

print("\nArithmetic mean:\n", arith_mean)
print("\nGeometric mean:\n", geo_mean)
print("\nHarmonic mean:\n", harm_mean)

In [None]:
'''
Download any csv file of your choice and display details about data using pandas profiling. 
Show stats in HTML form.
'''

from ydata_profiling import ProfileReport

# Replace with any CSV, e.g. "iris.csv"
iris = pd.read_csv("iris.csv")
profile = ProfileReport(iris, title="Iris Dataset Profiling Report", explorative=True)

# Save report as HTML
profile.to_file("iris_profiling_report.html")
print("\nProfiling report saved as iris_profiling_report.html")