In [9]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
 
def load_dataset(file_path): 
    return pd.read_csv(file_path) 

def numerical_stats(df, column): 
    data = df[column].dropna() 
    mean = np.mean(data) 
    median = np.median(data) 
    mode = data.mode()[0] if not data.mode().empty else None 
    std_dev = np.std(data, ddof=1) 
    variance = np.var(data, ddof=1) 
    data_range = np.max(data) - np.min(data) 
    
    print(f"Statistics for {column}:") 
    print(f"Mean: {mean}") 
    print(f"Median: {median}") 
    print(f"Mode: {mode}") 
    print(f"Standard Deviation: {std_dev}") 
    print(f"Variance: {variance}") 
    print(f"Range: {data_range}\n") 
    
    return data 

def detect_outliers(data): 
    Q1 = np.percentile(data, 25) 
    Q3 = np.percentile(data, 75) 
    IQR = Q3 - Q1 
    lower_bound = Q1 - 1.5 * IQR 
    upper_bound = Q3 + 1.5 * IQR 
    outliers = data[(data < lower_bound) | (data > upper_bound)] 
    
    print(f"Outliers: {outliers.tolist()}\n") 

def plot_numerical_distribution(data, column): 
    plt.figure(figsize=(12, 5)) 
     
    plt.subplot(1, 2, 1) 
    sns.histplot(data, bins=20, kde=True) 
    plt.title(f'Histogram of {column}') 
     
    plt.subplot(1, 2, 2) 
    sns.boxplot(x=data) 
    plt.title(f'Boxplot of {column}') 
     
    plt.show() 

def categorical_frequency(df, column): 
    category_counts = df[column].value_counts() 
     
    print(f"Frequency of categories in {column}:") 
    print(category_counts, "\n") 
     
    return category_counts 

def plot_categorical_distribution(category_counts, column): 
    plt.figure(figsize=(12, 5)) 
     
    plt.subplot(1, 2, 1) 
    category_counts.plot(kind='bar', color='skyblue') 
    plt.title(f'Bar Chart of {column}') 
     
    plt.subplot(1, 2, 2) 
    category_counts.plot(kind='pie', autopct='%1.1f%%') 
    plt.title(f'Pie Chart of {column}') 
     
    plt.show() 

def main(): 
    file_path = input("Enter dataset file path: ") 
    df = load_dataset(file_path) 
     
    print("Columns in dataset:", df.columns.tolist()) 
     
    num_col = input("Enter numerical column for analysis: ") 
    num_data = numerical_stats(df, num_col) 
    detect_outliers(num_data) 
    plot_numerical_distribution(num_data, num_col) 
    
    cat_col = input("Enter categorical column for frequency analysis: ") 
    cat_counts = categorical_frequency(df, cat_col) 
    plot_categorical_distribution(cat_counts, cat_col) 

if _name_ == "_main_": 
    main()

ModuleNotFoundError: No module named 'pandas'

In [10]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns


column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'] 


df = pd.read_csv("iris.csv", names=column_names, header=0)  


print("Dataset Preview:") 
print(df.head()) 


x_column = df.columns[0]  
y_column = df.columns[1]  


plt.figure(figsize=(8, 6)) 
sns.scatterplot(x=df[x_column], y=df[y_column]) 
plt.title(f'Scatter Plot: {x_column} vs {y_column}') 
plt.xlabel(x_column) 
plt.ylabel(y_column) 
plt.show() 


correlation = np.corrcoef(df[x_column], df[y_column])[0, 1] 
print(f'Pearson Correlation Coefficient between {x_column} and {y_column}: {correlation:.2f}') 


df_numeric = df.select_dtypes(include=[np.number])  
cov_matrix = df_numeric.cov() 
print("\nCovariance Matrix:") 
print(cov_matrix) 


corr_matrix = df_numeric.corr() 
print("\nCorrelation Matrix:") 
print(corr_matrix) 


plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5) 
plt.title('Correlation Matrix Heatmap') 
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [11]:
!pip install pandas numpy matplotlib seaborn




In [12]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler 
from sklearn.decomposition import PCA 
import seaborn as sns 
import matplotlib.pyplot as plt  

df = pd.read_csv("iris.csv")


if 'Id' in df.columns: 
    df = df.drop(columns=['Id'])  


X = df.drop(columns=['variety'])  


scaler = StandardScaler() 
X_scaled = scaler.fit_transform(X) 


pca = PCA(n_components=2) 
X_pca = pca.fit_transform(X_scaled) 

pca_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2']) 
pca_df['variety'] = df['variety'] 


plt.figure(figsize=(8, 6)) 
sns.scatterplot(data=pca_df, x='PC1', y='PC2', hue='variety', palette='viridis', s=100, edgecolor='k') 
plt.title("PCA Visualization of Iris Dataset") 
plt.xlabel("Principal Component 1") 
plt.ylabel("Principal Component 2") 
plt.legend() 
plt.show() 

print("Explained Variance Ratio:", pca.explained_variance_ratio_)

ModuleNotFoundError: No module named 'pandas'

In [None]:
mport numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score, f1_score 

# Load data
df = pd.read_csv('iris.csv') 

# Features and target
X = df.iloc[:, :-1].values 
y = df.iloc[:, -1].values 

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
) 

# Standardize features
scaler = StandardScaler() 
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test) 

# k-NN evaluation function
def evaluate_knn(k_values, weighted=False): 
    results = [] 
    for k in k_values: 
        model = KNeighborsClassifier(n_neighbors=k, weights='distance' if weighted else 'uniform')  
        model.fit(X_train, y_train)  
        y_pred = model.predict(X_test) 
        acc = accuracy_score(y_test, y_pred) 
        f1 = f1_score(y_test, y_pred, average='weighted') 
        results.append((k, acc, f1)) 
    return pd.DataFrame(results, columns=['k', 'Accuracy', 'F1-score']) 

# Evaluate
k_values = [1, 3, 5] 
regular_knn_results = evaluate_knn(k_values, weighted=False) 
weighted_knn_results = evaluate_knn(k_values, weighted=True) 

# Print results
print("Regular k-NN Results:") 
print(regular_knn_results) 
print("\nWeighted k-NN Results:") 
print(weighted_knn_results)