In [13]:
#CCPP
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
import matplotlib.pyplot as plt
import numpy as np

# Load the Combined Cycle Power Plant dataset
data = pd.read_csv('ccpp.csv')

# Display the first few rows of the dataset
print(data.head())

# Assign column names based on dataset description (if not already present)
# Assuming the dataset has columns: ['AT', 'V', 'AP', 'RH', 'PE'] where PE is the target
data.columns = ['AT', 'V', 'AP', 'RH', 'PE']

# Split features (X) and target (y)
X = data[['AT', 'V', 'AP', 'RH']]
y = data['PE']

# Normalize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert the continuous target variable into discrete categories
# For example, create 3 bins: 'Low', 'Medium', 'High'
binner = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
y = binner.fit_transform(y.values.reshape(-1, 1)).ravel()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Initialize the Logistic Regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate accuracy, MSE, and misclassification rate
accuracy = accuracy_score(y_test, y_pred)

misclassification_rate = 1 - accuracy

print(f"Accuracy: {accuracy * 100:.2f}%")

print(f"Misclassification Rate: {misclassification_rate * 100:.2f}%")




      AT      V       AP     RH      PE
0   8.34  40.77  1010.84  90.01  480.48
1  23.64  58.49  1011.40  74.20  445.75
2  29.74  56.90  1007.15  41.91  438.76
3  19.07  49.69  1007.22  76.79  453.09
4  11.80  40.66  1017.13  97.20  464.43
Accuracy: 87.85%
Misclassification Rate: 12.15%


In [25]:
#titanic dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import numpy as np

# Load the Titanic dataset
data = pd.read_csv('titanic.csv')

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# For simplicity, let's assume we're using 'Pclass', 'Sex', 'Age', and 'Fare' as features and 'Survived' as the target

# Select features and target
X = data[['Pclass', 'Sex', 'Age', 'Fare']]
y = data['Survived']

# Handle missing values (e.g., fill missing Age with the median)
X['Age'].fillna(X['Age'].median(), inplace=True)

# Convert categorical 'Sex' feature to numeric
X['Sex'] = LabelEncoder().fit_transform(X['Sex'])

# Normalize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Initialize the Logistic Regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate accuracy, MSE, and misclassification rate
accuracy = accuracy_score(y_test, y_pred)

misclassification_rate = 1 - accuracy

print(f"Accuracy: {accuracy * 100:.2f}%")

print(f"Misclassification Rate: {misclassification_rate * 100:.2f}%")



   Unnamed: 0  PassengerId  Survived  Pclass  \
0           0            1         0       3   
1           1            2         1       1   
2           2            3         1       3   
3           3            4         1       1   
4           4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000 

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Age'].fillna(X['Age'].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Age'].fillna(X['Age'].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Sex'] = LabelEncoder().fit_transform(X['Se

In [22]:
#iris dataset
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the Iris dataset
iris = load_iris()
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['species'] = iris.target

# Display the first few rows of the dataset
print(data.head())

# Select features and target
X = data[iris.feature_names]
y = data['species']

# Normalize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Initialize the Logistic Regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate accuracy, MSE, and misclassification rate
accuracy = accuracy_score(y_test, y_pred)

misclassification_rate = 1 - accuracy

print(f"Accuracy: {accuracy * 100:.2f}%")

print(f"Misclassification Rate: {misclassification_rate * 100:.2f}%")




   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   species  
0        0  
1        0  
2        0  
3        0  
4        0  
Accuracy: 98.33%
Misclassification Rate: 1.67%


In [31]:
#wine dataset
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

# Load the Wine dataset
wine = load_wine()
data = pd.DataFrame(data=wine.data, columns=wine.feature_names)
data['target'] = wine.target

# Display the first few rows of the dataset
print(data.head())

# Select features and target
X = data[wine.feature_names]
y = data['target']

# Normalize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Initialize the Logistic Regression model
model = LogisticRegression(max_iter=1000, multi_class='ovr')

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate accuracy, MSE, and misclassification rate
accuracy = accuracy_score(y_test, y_pred)

misclassification_rate = 1 - accuracy

print(f"Accuracy: {accuracy * 100:.2f}%")

print(f"Misclassification Rate: {misclassification_rate * 100:.2f}%")




   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  target  
0          

