In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

train_url = 'https://raw.githubusercontent.com/dsrscientist/dataset5/main/termdeposit_train.csv'
test_url = 'https://raw.githubusercontent.com/dsrscientist/dataset5/main/termdeposit_test.csv'

train_data = pd.read_csv(train_url)
test_data = pd.read_csv(test_url)

data = pd.concat([train_data, test_data], axis=0)

# Convert categorical variables to numerical using one-hot encoding
data = pd.get_dummies(data, drop_first=True)

# Split the data into features (X) and target variable (y)
X = data.drop('subscribed_yes', axis=1)
y = data['subscribed_yes']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[8203  103]
 [ 610  127]]
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      8306
           1       0.55      0.17      0.26       737

    accuracy                           0.92      9043
   macro avg       0.74      0.58      0.61      9043
weighted avg       0.90      0.92      0.90      9043



In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

url = 'https://raw.githubusercontent.com/dsrscientist/dataset3/main/glass.csv'
column_names = ['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4', 'Feature 5', 'Feature 6', 'Feature 7', 'Feature 8', 'Feature 9', 'Feature 10', 'Type']
glass_data = pd.read_csv(url, names=column_names)

# Perform one-hot encoding for categorical variable(s)
glass_data_encoded = pd.get_dummies(glass_data, columns=['Type'])

X = glass_data_encoded.drop('Feature 1', axis=1)  # Adjust the target column as needed
y = glass_data_encoded['Feature 1']  # Adjust the target column as needed

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R2 Score:", r2)


Mean Squared Error: 265.12991054723733
R2 Score: 0.9317367434333286


In [27]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

# Load the dataset
url = "https://raw.githubusercontent.com/dsrscientist/dataset4/main/Grades.csv"
grades_data = pd.read_csv(url)

# Drop unnecessary columns
grades_data = grades_data.drop(['Seat No.'], axis=1)

# Convert non-numeric grades to numeric values
label_encoder = LabelEncoder()
grades_data = grades_data.apply(lambda x: label_encoder.fit_transform(x.astype(str)))

# Separate features and target variable
X = grades_data.drop('CGPA', axis=1)
y = grades_data['CGPA']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the target variable for the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = np.mean((y_pred - y_test) ** 2)
print("Mean Squared Error:", mse)


Mean Squared Error: 1449.6072487740648
