In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Load dataset
df = pd.read_csv('D:/Materi Kuliah UNAIR/Semester 5/Pembelajaran Mesin (Praktikum) RK-A2/modified_auto_mpg.csv')

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)   # Menampilkan semua baris
pd.set_option('display.max_columns', None) # Menampilkan semua kolom

# Print the DataFrame
df

In [None]:
# Convert 'horsepower' to numeric, coercing errors to NaN
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')

df

In [None]:
# Calculate mean, max, min, and mode for attributes 1-8
summary_stats = {
    "Mean": df.iloc[:, :8].mean(),
    "Max": df.iloc[:, :8].max(),
    "Min": df.iloc[:, :8].min(),
    "Mode": df.iloc[:, :8].mode().iloc[0]  # Taking the first mode
}

# Convert to DataFrame for easier viewing
summary_stats_df = pd.DataFrame(summary_stats)

# Diagnose missing values and NaNs
missing_values = df.iloc[:, :8].isna().sum()

# Combine the results
summary_stats_df['Missing Value/NaN'] = missing_values

# Display the dataframe in the console
print(summary_stats_df)

# Optionally, save the dataframe to a CSV file
summary_stats_df.to_csv('summary_statistics.csv', index=False)

# Handle missing values
# 1. Replace missing 'mpg' values with the mean
mpg_mean = df['mpg'].mean()
df['mpg'] = df['mpg'].fillna(mpg_mean)

# 2. Replace missing 'horsepower' values with the mode
horsepower_mode = df['horsepower'].mode()[0]
df['horsepower'] = df['horsepower'].fillna(horsepower_mode)

# Check for missing values again
missing_values_after = df.isna().sum()

# Display the updated missing values count
print("\nMissing values after handling:")
print(missing_values_after)

In [None]:
# Function to calculate outliers using the IQR method
def calculate_outliers(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = series[(series < lower_bound) | (series > upper_bound)]
    return len(outliers)

# Applying the outlier calculation for each attribute (1-8)
outliers_count = df.iloc[:, :8].apply(calculate_outliers)

# Convert to DataFrame and display or save
outliers_count_df = outliers_count.to_frame(name='Outliers Count')

# Display the dataframe in the console
print(outliers_count_df)

# Optionally, save the dataframe to a CSV file
outliers_count_df.to_csv('outliers_count.csv', index=False)

df