**Morfologiska Egenskaper hos Iris-blommor**

In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols 
import scipy.stats as scs
import numpy as np

# Load the Iris dataset
file_path = os.path.realpath("C:\\Users\\NedyaIbrahim-AI23GBG\\Documents\\Github\\Statistical_methods\\Labb\\data\\iris.csv")
iris = pd.read_csv(file_path)

"""Displaying Basic Information of the Dataset"""

# Display the first rows of the dataset
iris.head()

# Check for missing values
iris.isnull().any()

# Remove spaces from column names
iris.columns = iris.columns.str.strip()

"""Statistical Summary for Each Class"""

# Group the dataset by class and display statistical summary for each class
grouped_class = iris.groupby("class")
for class_value, data in grouped_class:
    print(f"{class_value.capitalize()}:")
    display(data.describe())

# The mean values for each class
# Display mean values for numeric columns grouped by class
print(iris.groupby('class').mean(numeric_only=True))

"""Petal Measurements Visualization"""

# Create a scatter plot for petal_width and petal_length, colored by class
# Use a custom color palette for each class

custom_palette = {"Iris-setosa": "red", "Iris-versicolor": "yellow", "Iris-virginica": "purple"}
sns.scatterplot(data=iris, x="petal_width", y="petal_length", hue="class", palette=custom_palette)
plt.title("Petal Measurements by Class")
plt.xlabel("Petal Width")
plt.ylabel("Petal Length")
plt.savefig("visualization/Petal_Measurements_by_Class.png", bbox_inches="tight")
plt.show()

"""Regression Line and Scatter Plot for Petal Measurements"""

# Use lmplot to create a regression line and scatter plot for petal_width and petal_length
sns.lmplot(data=iris, x="petal_width", y="petal_length", scatter_kws={"alpha": 0.5, "color": "purple"}, line_kws={"color": "purple"})
plt.title("Regression Line and Scatter Plot")
plt.xlabel("Petal Width")
plt.ylabel("Petal Length")
plt.tight_layout()
plt.savefig("visualization/Regression_Line_and_Scatter_Plot.png", bbox_inches="tight")
plt.show()

"""Regression Line with Different Colors for Each Class - Petal Measurements"""

# Use lmplot to create a regression line with different colors for each class
sns.lmplot(data=iris, x="petal_width", y="petal_length", hue="class", palette=custom_palette, scatter_kws={"alpha": 0.5})
plt.title("Petal Regression and Scatter")
plt.xlabel("Petal Width")
plt.ylabel("Petal Length")
plt.tight_layout()
plt.savefig("visualization/Petal_Regression_and_Scatter.png", bbox_inches="tight")
plt.show()

"""Scatter Plot for Sepal Measurements"""

# Create a scatter plot for sepal_width and sepal_length
sns.scatterplot(data=iris, x="sepal_width", y="sepal_length", color="purple")
plt.title("Sepal Width vs Sepal Length - Point")
plt.xlabel("Sepal Width")
plt.ylabel("Sepal Length")
plt.savefig("visualization/Sepal_Width_vs_Sepal_Length_Point.png", bbox_inches="tight")
plt.show()

"""Regression Line for Sepal Measurements"""

# Use lmplot to create a regression line and scatter plot for sepal_width and sepal_length
sns.lmplot(data=iris, x="sepal_width", y="sepal_length", scatter_kws={"alpha": 0.5, "color": "yellow"})
plt.title("Sepal Regression Line and Scatter Plot")
plt.xlabel("Sepal Width")
plt.ylabel("Sepal Length")
plt.tight_layout()
plt.savefig("visualization/Sepal_Regression_Line_and_Scatter_Plot.png", bbox_inches="tight")
plt.show()

"""Regression Lines for Sepal Measurements by Class"""

# Use lmplot to create regression lines for sepal_width and sepal_length, categorized by class
sns.lmplot(data=iris, x="sepal_width", y="sepal_length", hue="class", palette=custom_palette, scatter_kws={"alpha": 0.5}, legend=False)
plt.title("Sepal Measurements Variation Among Different Classes")
plt.xlabel("Sepal Width")
plt.ylabel("Sepal Length")
plt.tight_layout()
plt.legend(loc="best")
plt.savefig("visualization/Sepal_Measurements_Variation_Among_Different_Classes.png", bbox_inches="tight")
plt.show()

"""Pair Plot for Entire Dataset"""

# Generate a pairplot for the entire dataset, creating a grid of all columns against each other
chart = sns.pairplot(iris, hue="class", palette=custom_palette, plot_kws=dict(alpha=0.5))
sns.move_legend(chart, "upper center", bbox_to_anchor=(0.5, 1.01), ncol=3, title="")
plt.suptitle("Point Distribution Overview of Class", y=1.025)
plt.xlabel("Feature")
plt.ylabel("Feature")
plt.tight_layout()
plt.savefig("visualization/Point_Distribution_Overview_of_Class.png", bbox_inches="tight")
plt.show()

"""Correlation Analysis"""

# Calculate and visualize correlation matrices for the entire dataset and each class
corr = iris.corr(numeric_only=True)
class_corr = iris.groupby("class").apply(lambda x: x.corr(numeric_only=True))
fig, axes = plt.subplots(1, 4, figsize=(20, 5))

# Heatmap for Entire Dataset
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", ax=axes[0])
axes[0].set_title("Correlation Matrix - Entire Dataset")

# Heatmaps for Each Class
for i, class_name in enumerate(iris['class'].unique()):
    sns.heatmap(class_corr.loc[class_name], annot=True, cmap='coolwarm', fmt=".2f", ax=axes[i+1])
    axes[i+1].set_title(f"Correlation Matrix - {class_name.capitalize()}")

plt.tight_layout()
plt.savefig("visualization/Exploring_Correlation_Patterns.png", bbox_inches="tight")
plt.show()

"""Exact Values of Correlations (r-squared)"""

# Display the exact values of correlations (r-squared)
print(corr**2)

"""Petal and Sepal Correlation Tests"""

# Perform correlation tests for petal and sepal measurements
r, p = scs.pearsonr(iris["petal_width"], iris["petal_length"])
print(f"Petal correlation: r = {round(r, 3)}, p = {p}")

r, p = scs.pearsonr(iris["sepal_width"], iris["sepal_length"])
print(f"Sepal correlation: r = {round(r, 3)}, p = {p}")

"""Linear Regression Using Ordinary Least Squares (OLS) for Petals"""

# Fit a linear regression model using OLS for petal measurements
formula = "petal_length ~ petal_width"
model = ols(formula, data=iris).fit()
# Display the summary of the regression analysis
print(model.summary())

"""In-Sample Predictions of Randomly Generated Points"""

# Create a new figure with two subplots side by side
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Generate random points and predict petal length using the trained OLS model
random_points = pd.DataFrame(np.random.uniform(min(iris["petal_width"]), max(iris["petal_width"]), 30), columns=["petal_width"])
ypred = model.predict(random_points)

# Plot the entire dataset and the predicted points
sns.regplot(data=iris, x="petal_width", y="petal_length", label="Synthetic Data", line_kws={"linewidth": 2, "label": "Regplot Line"}, color="yellow", ax=axes[0])
axes[0].scatter(random_points["petal_width"], ypred, label="Predicted Values(OLS)", color="red", marker="s")
axes[0].set_title("Entire Dataset and Predicted Points")
axes[0].set_xlabel("Petal Width")
axes[0].set_ylabel("Petal Length")
axes[0].legend()

# Plot only the predicted points in the second subplot
axes[1].scatter(random_points["petal_width"], ypred, label="Predicted Values(OLS)", color="purple", marker="s")
axes[1].set_title("Predicted Points Only")
axes[1].set_xlabel("Petal Width")
axes[1].set_ylabel("Petal Length")
axes[1].legend()

# Adjust layout for a better appearance
plt.tight_layout()

# Save the plot as an image file
plt.savefig("visualization/Predicting_Petal_Length_with_In_Sample_Points_Separate.png", bbox_inches="tight")

# Show the plot
plt.show()

$H_{0a}$