# **Predictive Modeling Possibilities**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset, replace 'your_data.csv' with your data file.
data = pd.read_csv('your_data.csv')

# Assuming your dataset has features like 'feature1', 'feature2', etc., and 'GRADE' as the target variable.
# Replace these with your actual feature and target column names.

# Preprocess the data
X = data[['feature1', 'feature2']]  # Features
y = data['GRADE']  # Target variable

# Encode categorical variables (if needed)
le = LabelEncoder()
X['feature1'] = le.fit_transform(X['feature1'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose a classification model (Random Forest in this example)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report for more detailed evaluation
print(classification_report(y_test, y_pred))

# Use the trained model to make predictions on new data
new_data = pd.DataFrame({'feature1': [value1], 'feature2': [value2]})  # Replace with your new data
predicted_grade = model.predict(new_data)
print(f'Predicted Grade: {predicted_grade[0]}')

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset, replace 'your_data.csv' with your data file.
data = pd.read_csv('your_data.csv')

# Assuming your dataset has various features and 'GRADE' as the target variable.
# Replace these with your actual feature and target column names.

# Preprocess the data
X = data.drop(columns=['GRADE'])  # Features
y = data['GRADE']  # Target variable

# Handle missing data (you may need more advanced methods)
X.fillna(0, inplace=True)

# Encode categorical variables (if needed)
categorical_columns = X.select_dtypes(include=['object']).columns
for col in categorical_columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (normalize numerical features)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Choose a classification model (Random Forest in this example)
model = RandomForestClassifier(random_state=42)

# Hyperparameter tuning using GridSearchCV (you may need more parameters)
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best model from hyperparameter tuning
best_model = grid_search.best_estimator_

# Train the best model
best_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report for more detailed evaluation
print(classification_report(y_test, y_pred))

# Use the trained model to make predictions on new data
new_data = pd.DataFrame({
    'Feature1': [value1],
    'Feature2': [value2],
    # Add more features as needed
})

# Preprocess new data similarly to training data (handle missing values, encoding, scaling)
new_data.fillna(0, inplace=True)
new_data[categorical_columns] = le.transform(new_data[categorical_columns])
new_data = scaler.transform(new_data)

predicted_grade = best_model.predict(new_data)
print(f'Predicted Grade: {predicted_grade[0]}')