# Rough workspace

In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Create a logistic regression model
model = make_pipeline(StandardScaler(), LogisticRegression())

# Perform cross-validation
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')

# Output the scores
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())


Cross-validation scores: [0.96666667 1.         0.93333333 0.9        1.        ]
Mean accuracy: 0.9600000000000002


In [8]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Create a logistic regression model pipeline
model = make_pipeline(StandardScaler(), LogisticRegression())

# Define stratified k-fold cross-validation
skf = StratifiedKFold(n_splits = 5)

# Perform cross-validation with stratified k-fold
scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')

# Output the scores
print("Stratified cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())


Stratified cross-validation scores: [0.96666667 1.         0.93333333 0.9        1.        ]
Mean accuracy: 0.9600000000000002


In [15]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Create a logistic regression model pipeline
model = make_pipeline(StandardScaler(), LogisticRegression())

# Define stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=5)

# Initialize lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices = []

# Perform stratified cross-validation
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    accuracies.append(accuracy_score(y_test, y_pred))
    precisions.append(precision_score(y_test, y_pred, average='weighted'))
    recalls.append(recall_score(y_test, y_pred, average='weighted'))
    f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
    
# Output the average metrics
print("Mean Accuracy:", sum(accuracies) / len(accuracies))
print("Mean Precision:", sum(precisions) / len(precisions))
print("Mean Recall:", sum(recalls) / len(recalls))
print("Mean F1-Score:", sum(f1_scores) / len(f1_scores))



print("printing before the cross validation")




from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split the dataset into training and testing sets (e.g., 80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Create a logistic regression model pipeline
model = make_pipeline(StandardScaler(), LogisticRegression())

# Train the model on the training set
model.fit(X_train, y_train)

# Predict on the testing set
y_pred = model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Output the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


Mean Accuracy: 0.9600000000000002
Mean Precision: 0.9632996632996633
Mean Recall: 0.9600000000000002
Mean F1-Score: 0.9597984861142755
printing before the cross validation
Accuracy: 0.9333333333333333
Precision: 0.9333333333333333
Recall: 0.9333333333333333
F1-Score: 0.9333333333333333


In [16]:
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Create a logistic regression model pipeline
model = make_pipeline(StandardScaler(), LogisticRegression())

# Define stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=5)

# Generate cross-validated predictions
y_pred = cross_val_predict(model, X, y, cv=skf)

# Calculate metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred, average='weighted')
recall = recall_score(y, y_pred, average='weighted')
f1 = f1_score(y, y_pred, average='weighted')
confusion_mat = confusion_matrix(y, y_pred)

# Output the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Confusion Matrix:\n", confusion_mat)


Accuracy: 0.96
Precision: 0.96
Recall: 0.96
F1-Score: 0.96
Confusion Matrix:
 [[50  0  0]
 [ 0 47  3]
 [ 0  3 47]]


In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# True labels
y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0]

# Predicted labels
y_pred = [1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1]

# Calculate the confusion matrix
confusion_mat = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", confusion_mat)

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print("Accuracy:", accuracy)

# Calculate precision, recall, and F1-score with macro and weighted averages
precision_macro = precision_score(y_true, y_pred, average='macro')
recall_macro = recall_score(y_true, y_pred, average='macro')
f1_macro = f1_score(y_true, y_pred, average='macro')

precision_weighted = precision_score(y_true, y_pred, average='weighted')
recall_weighted = recall_score(y_true, y_pred, average='weighted')
f1_weighted = f1_score(y_true, y_pred, average='weighted')

print("\nMacro Averages:")
print("Precision (Macro):", precision_macro)
print("Recall (Macro):", recall_macro)
print("F1-Score (Macro):", f1_macro)

print("\nWeighted Averages:")
print("Precision (Weighted):", precision_weighted)
print("Recall (Weighted):", recall_weighted)
print("F1-Score (Weighted):", f1_weighted)


Confusion Matrix:
 [[7 2]
 [2 9]]
Accuracy: 0.8

Macro Averages:
Precision (Macro): 0.797979797979798
Recall (Macro): 0.797979797979798
F1-Score (Macro): 0.797979797979798

Weighted Averages:
Precision (Weighted): 0.8
Recall (Weighted): 0.8
F1-Score (Weighted): 0.8


In [None]:
sfsfsdfdzfsfsdfsafsdgsfgsgsdzfsdfsghtrthrfgsgsgdsgdsgdsasdfsdfsagasafdsadfsfsddfszvfregesasgfsfegseasfaeg