In [8]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target
#describe(data)
# Standardize the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Perform PCA for dimensionality reduction
n_components = 2
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_standardized)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Train an SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0, random_state=42)
svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=data.target_names)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", report)


Accuracy: 0.90
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.88      0.78      0.82         9
   virginica       0.83      0.91      0.87        11

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30



In [7]:
print(data.DESCR)


.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [9]:
import numpy as np
from scipy.stats import f_oneway

# Define the data for each group
group1 = np.array([1, 2, 5])
group2 = np.array([2, 4, 2])
group3 = np.array([2, 3, 4])

# Perform one-way ANOVA
f_statistic, p_value = f_oneway(group1, group2, group3)

# Print the results
print(f"F-statistic: {f_statistic:.2f}")
print(f"P-value: {p_value:.4f}")

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis - There is a significant difference between groups.")
else:
    print("Fail to reject the null hypothesis - There is no significant difference between groups.")


F-statistic: 0.05
P-value: 0.9516
Fail to reject the null hypothesis - There is no significant difference between groups.


In [10]:
from scipy.stats import f, f_oneway

# Define the degrees of freedom for the numerator (groups) and denominator (error)
dfn = 2  # Degrees of freedom for groups (number of groups - 1)
dfd = 6  # Degrees of freedom for error (total number of observations - number of groups)

# Set the significance level (alpha)
alpha = 0.05

# Calculate the critical value for the F-statistic
critical_value = f.ppf(1 - alpha, dfn, dfd)

# Perform one-way ANOVA
group1 = [1, 2, 5]
group2 = [2, 4, 2]
group3 = [2, 3, 4]

f_statistic, p_value = f_oneway(group1, group2, group3)

# Print the results
print(f"F-statistic: {f_statistic:.2f}")
print(f"P-value: {p_value:.4f}")
print(f"Critical Value: {critical_value:.2f}")

# Compare F-statistic to the critical value
if f_statistic > critical_value:
    print("Reject the null hypothesis - There is a significant difference between groups.")
else:
    print("Fail to reject the null hypothesis - There is no significant difference between groups.")


F-statistic: 0.05
P-value: 0.9516
Critical Value: 5.14
Fail to reject the null hypothesis - There is no significant difference between groups.


In [11]:
import numpy as np
from scipy.stats import f_oneway

# Define the data for each group
group1 = np.array([1, 2, 5])
group2 = np.array([2, 4, 2])
group3 = np.array([2, 3, 4])

# Perform one-way ANOVA
f_statistic, p_value = f_oneway(group1, group2, group3)

# Calculate the means for each group
mean_group1 = np.mean(group1)
mean_group2 = np.mean(group2)
mean_group3 = np.mean(group3)

# Calculate the Grand Mean
grand_mean = np.mean(np.concatenate([group1, group2, group3]))

# Calculate SSBetween (Between-Group Sum of Squares)
n_groups = 3
n_total = len(group1) + len(group2) + len(group3)
SSBetween = sum([len(group) * (mean - grand_mean) ** 2 for group, mean in zip([group1, group2, group3], [mean_group1, mean_group2, mean_group3])])

# Calculate SSTotal (Total Sum of Squares)
SSTotal = sum([(x - grand_mean) ** 2 for x in np.concatenate([group1, group2, group3])])

# Calculate SSWithin (Within-Group Sum of Squares)
SSWithin = SSTotal - SSBetween

# Calculate MSBetween (Mean Squares Between)
df_between = n_groups - 1
df_within = n_total - n_groups
MSBetween = SSBetween / df_between

# Calculate MSWithin (Mean Squares Within)
MSWithin = SSWithin / df_within

# Print the results
print(f"F-statistic: {f_statistic:.2f}")
print(f"P-value: {p_value:.4f}")
print(f"SSBetween: {SSBetween:.2f}")
print(f"SSTotal: {SSTotal:.2f}")
print(f"SSWithin: {SSWithin:.2f}")
print(f"MSBetween: {MSBetween:.2f}")
print(f"MSWithin: {MSWithin:.2f}")

# Compare F-statistic to the critical value
alpha = 0.05
if f_statistic > critical_value:
    print("Reject the null hypothesis - There is a significant difference between groups.")
else:
    print("Fail to reject the null hypothesis - There is no significant difference between groups.")


F-statistic: 0.05
P-value: 0.9516
SSBetween: 0.22
SSTotal: 13.56
SSWithin: 13.33
MSBetween: 0.11
MSWithin: 2.22
Fail to reject the null hypothesis - There is no significant difference between groups.


In [12]:
import numpy as np
from scipy.stats import f, f_oneway

# Define the data for each group
group1 = np.array([1, 2, 5])
group2 = np.array([2, 4, 2])
group3 = np.array([2, 3, 4])

# Calculate the grand mean
grand_mean = (group1.mean() + group2.mean() + group3.mean()) / 3

# Calculate SSBetween
SSBetween = len(group1) * (group1.mean() - grand_mean)**2 + len(group2) * (group2.mean() - grand_mean)**2 + len(group3) * (group3.mean() - grand_mean)**2

# Calculate SSTotal
SSTotal = ((group1 - grand_mean)**2).sum() + ((group2 - grand_mean)**2).sum() + ((group3 - grand_mean)**2).sum()

# Calculate SSWithin
SSWithin = ((group1 - group1.mean())**2).sum() + ((group2 - group2.mean())**2).sum() + ((group3 - group3.mean())**2).sum()

# Calculate degrees of freedom
df_between = 2  # Degrees of freedom for groups (number of groups - 1)
df_total = 8  # Total degrees of freedom (total number of observations - 1)
df_within = df_total - df_between  # Degrees of freedom within groups

# Calculate MSBetween and MSWithin
MSBetween = SSBetween / df_between
MSWithin = SSWithin / df_within

# Calculate F-statistic
F_statistic = MSBetween / MSWithin

# Set the significance level (alpha)
alpha = 0.05

# Calculate the critical value for the F-statistic
critical_value = f.ppf(1 - alpha, df_between, df_within)

# Print the results
print(f"SSBetween: {SSBetween:.2f}")
print(f"SSTotal: {SSTotal:.2f}")
print(f"SSWithin: {SSWithin:.2f}")
print(f"MSBetween: {MSBetween:.2f}")
print(f"MSWithin: {MSWithin:.2f}")
print(f"F-statistic: {F_statistic:.2f}")
print(f"Critical Value: {critical_value:.2f}")

# Compare F-statistic to the critical value
if F_statistic > critical_value:
    print("Reject the null hypothesis - There is a significant difference between groups.")
else:
    print("Fail to reject the null hypothesis - There is no significant difference between groups.")


SSBetween: 0.22
SSTotal: 13.56
SSWithin: 13.33
MSBetween: 0.11
MSWithin: 2.22
F-statistic: 0.05
Critical Value: 5.14
Fail to reject the null hypothesis - There is no significant difference between groups.
