In [54]:
from pymongo import MongoClient
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Input

In [None]:
# Connect to MongoDB
mongo = MongoClient(port=27017)

# List all databases and highlight the specific one
for dbname in mongo.list_database_names():
    if dbname == "texasSchoolsDB":
        print(f'{dbname} <----------')
    else:
        print(dbname)

In [56]:

# Set the database and collection
db = mongo["texasSchoolsDB"]
collection = db["NN_model"]

In [57]:
# Convert MongoDB collection to DataFrame
data = list(collection.find({}, {'_id': 0}))
NN_modeldf = pd.DataFrame(data)

NN_modeldf.head()

Unnamed: 0,DISTRICT NAME,TOTAL STUDENTS,DISTRICT ACCOUNTABILITY RATINGS,4-YR LONGITUDINAL GRADUATION RATE DISTRICT EXCL,STAAR: % ELA/READING AT APPROACHES GRADE LEVEL STANDARD OR ABOVE,STAAR: % ELA/READING AT MEETS GRADE LEVEL STANDARD OR ABOVE,STAAR: % ELA/READING AT MASTERS GRADE LEVEL STANDARD,STAAR: % MATHEMATICS AT APPROACHES GRADE LEVEL STANDARD OR ABOVE,STAAR: % MATHEMATICS AT MEETS GRADE LEVEL STANDARD OR ABOVE,STAAR: % MATHEMATICS AT MASTERS GRADE LEVEL STANDARD,COLLEGE ADMISSIONS: % TESTED,COLLEGE ADMISSIONS: % AT/ABOVE CRITERION
0,CAYUGA ISD,574,A,100.0,81.0,67.0,39.0,88.0,65.0,34.0,58.3,19.0
1,ELKHART ISD,1150,A,100.0,85.0,64.0,34.0,84.0,49.0,23.0,51.6,27.7
2,FRANKSTON ISD,808,A,95.2,84.0,63.0,24.0,85.0,57.0,26.0,92.7,36.8
3,NECHES ISD,342,A,95.8,87.0,67.0,30.0,94.0,69.0,27.0,87.0,15.0
4,PALESTINE ISD,3360,B,99.0,72.0,48.0,20.0,75.0,44.0,20.0,43.3,49.4


In [58]:

NN_modeldf.columns = NN_modeldf.columns.str.replace('[^A-Za-z0-9]+', ' ', regex=True).str.strip().str.replace(' ', '_')


In [59]:
NN_modeldf = NN_modeldf.dropna()

In [60]:
NN_modeldf.columns

Index(['DISTRICT_NAME', 'TOTAL_STUDENTS', 'DISTRICT_ACCOUNTABILITY_RATINGS',
       '4_YR_LONGITUDINAL_GRADUATION_RATE_DISTRICT_EXCL',
       'STAAR_ELA_READING_AT_APPROACHES_GRADE_LEVEL_STANDARD_OR_ABOVE',
       'STAAR_ELA_READING_AT_MEETS_GRADE_LEVEL_STANDARD_OR_ABOVE',
       'STAAR_ELA_READING_AT_MASTERS_GRADE_LEVEL_STANDARD',
       'STAAR_MATHEMATICS_AT_APPROACHES_GRADE_LEVEL_STANDARD_OR_ABOVE',
       'STAAR_MATHEMATICS_AT_MEETS_GRADE_LEVEL_STANDARD_OR_ABOVE',
       'STAAR_MATHEMATICS_AT_MASTERS_GRADE_LEVEL_STANDARD',
       'COLLEGE_ADMISSIONS_TESTED', 'COLLEGE_ADMISSIONS_AT_ABOVE_CRITERION'],
      dtype='object')

In [63]:
NN_modeldf = pd.get_dummies(NN_modeldf, columns=['DISTRICT_NAME'])

In [64]:

# Prepare features and target
X = NN_modeldf.drop(columns=['DISTRICT_ACCOUNTABILITY_RATINGS'])
y = NN_modeldf['DISTRICT_ACCOUNTABILITY_RATINGS']

In [67]:

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize and fit the LabelEncoder and StandardScaler
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the model using the Input layer
model = Sequential([
    Input(shape=(1051,)), 
    Dense(80, activation="relu"),
    Dense(100, activation='relu'),
    Dense(50, activation='relu'),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])




In [68]:
print(X_train_scaled.shape)  # Check the shape of the training data


(840, 1051)


In [69]:
# Now the model is ready to be trained
model.fit(X_train_scaled, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5301 - loss: 0.7473 - val_accuracy: 0.5060 - val_loss: 0.7218
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5757 - loss: 0.7878 - val_accuracy: 0.5595 - val_loss: 0.6454
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7335 - loss: 0.6823 - val_accuracy: 0.6310 - val_loss: 0.5948
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8352 - loss: 0.6787 - val_accuracy: 0.6250 - val_loss: 0.5706
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8783 - loss: 0.5192 - val_accuracy: 0.6310 - val_loss: 0.5760
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8798 - loss: 0.5179 - val_accuracy: 0.6310 - val_loss: 0.5750
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x306692a10>

In [70]:
# Evaluate the model
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_encoded, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - 4ms/step - accuracy: 0.7251 - loss: 0.5850
Loss: 0.5849823355674744, Accuracy: 0.7251184582710266


In [None]:


# Save the trained model
model.save("ISD_Accountability.keras")