In [2]:
import numpy as np
import pandas as pd
import warnings
import pickle
warnings.filterwarnings("ignore")
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.metrics import accuracy_score

from prettytable import PrettyTable   
from astropy.table import Table, Column

In [2]:
sample_data = pd.read_csv("StudentsPerformance.csv")

print("\n\nSample Data:")
print("============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(sample_data)



Sample Data:

     gender  race  parental level of education  lunch  \
0         0     2                            1      1   
1         1     5                            3      1   
2         0     4                            6      1   
3         0     2                            5      1   
4         1     3                            3      0   
5         0     5                            5      1   
6         0     4                            6      0   
7         0     2                            1      1   
8         1     1                            2      1   
9         1     2                            4      0   
10        1     3                            6      1   
11        0     3                            4      1   
12        1     4                            3      1   
13        1     1                            1      1   
14        1     4                            1      0   
15        0     2                            1      1   
16        0    

In [3]:
print("\n\nAttributes in Sample Data:")
print("==========================\n")

print(sample_data.columns)

print("\n\nNumber of Instances in Sample Data:",sample_data["gender"].count())
print("========================================\n")



Attributes in Sample Data:

Index(['gender', 'race', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score', 'result'],
      dtype='object')


Number of Instances in Sample Data: 1000



In [4]:
training_data_encoded, testing_data_encoded = train_test_split( sample_data , test_size=0.2 , random_state=0 , shuffle = False)

# Save the Training and Testing Data into CSV File 

training_data_encoded.to_csv(r'training-data-encoded.csv', index = False, header = True)
testing_data_encoded.to_csv(r'testing-data-encoded.csv', index = False, header = True)

# print Training and Testing Data

print("\n\nTraining Data:")
print("==============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(training_data_encoded)
print("\n\nTesting Data:")
print("==============\n")
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(testing_data_encoded)



Training Data:

     gender  race  parental level of education  lunch  \
0         0     2                            1      1   
1         1     5                            3      1   
2         0     4                            6      1   
3         0     2                            5      1   
4         1     3                            3      0   
5         0     5                            5      1   
6         0     4                            6      0   
7         0     2                            1      1   
8         1     1                            2      1   
9         1     2                            4      0   
10        1     3                            6      1   
11        0     3                            4      1   
12        1     4                            3      1   
13        1     1                            1      1   
14        1     4                            1      0   
15        0     2                            1      1   
16        0  

In [5]:
print("\n\nInputs Vectors (Feature Vectors) of Training Data:")
print("==================================================\n")
input_vector_train = training_data_encoded.iloc[: , :-1]
print(input_vector_train)

print("\n\nOutputs/Labels of Training Data:")
print("================================\n")
print("  Results")
output_label_train = training_data_encoded.iloc[: ,-1]
print(output_label_train)



Inputs Vectors (Feature Vectors) of Training Data:

     gender  race  parental level of education  lunch  \
0         0     2                            1      1   
1         1     5                            3      1   
2         0     4                            6      1   
3         0     2                            5      1   
4         1     3                            3      0   
5         0     5                            5      1   
6         0     4                            6      0   
7         0     2                            1      1   
8         1     1                            2      1   
9         1     2                            4      0   
10        1     3                            6      1   
11        0     3                            4      1   
12        1     4                            3      1   
13        1     1                            1      1   
14        1     4                            1      0   
15        0     2                 

In [6]:
print("\n\nTraining the Support Vector Classifier on Training Data")
print("========================================================\n")
print("\nParameters and their values:")
print("============================\n")
svc_model = svm.SVC(gamma='auto',random_state=0)
svc_model.fit(input_vector_train,np.ravel(output_label_train))
print(svc_model)



Training the Support Vector Classifier on Training Data


Parameters and their values:

SVC(gamma='auto', random_state=0)


In [7]:
pickle.dump(svc_model, open('svc_trained_model.pkl', 'wb'))

In [8]:
print("\n\nInputs Vectors (Feature Vectors) of Testing Data:")
print("=================================================\n")
input_vector_test = testing_data_encoded.iloc[: , :-1]
print(input_vector_test)

print("\n\nOutputs/Labels of Testing Data:")
print("==============================\n")
print("  Results")
output_label_test = testing_data_encoded.iloc[: ,-1]
print(output_label_test)



Inputs Vectors (Feature Vectors) of Testing Data:

     gender  race  parental level of education  lunch  \
800       1     3                            2      0   
801       1     4                            5      0   
802       1     1                            3      0   
803       1     4                            1      0   
804       0     3                            5      1   
805       1     2                            5      1   
806       0     3                            5      1   
807       1     1                            3      1   
808       1     5                            3      1   
809       1     3                            1      1   
810       0     2                            5      0   
811       0     4                            2      0   
812       0     2                            5      1   
813       1     2                            4      1   
814       1     4                            4      1   
815       0     3                  

In [9]:
model = pickle.load(open('svc_trained_model.pkl', 'rb'))

In [10]:
model_predictions = model.predict(input_vector_test)
testing_data_encoded.copy(deep=True)
pd.options.mode.chained_assignment = None
testing_data_encoded["Predicted Results"] = model_predictions

# Save the Predictions into CSV File

testing_data_encoded.to_csv(r'model-predictions.csv', index = False, header = True)

model_predictions = testing_data_encoded 
print("\n\nPredicted Results Returned by svc_trained_model:")
print("==========================================\n")
print(model_predictions)



Predicted Results Returned by svc_trained_model:

     gender  race  parental level of education  lunch  \
800       1     3                            2      0   
801       1     4                            5      0   
802       1     1                            3      0   
803       1     4                            1      0   
804       0     3                            5      1   
805       1     2                            5      1   
806       0     3                            5      1   
807       1     1                            3      1   
808       1     5                            3      1   
809       1     3                            1      1   
810       0     2                            5      0   
811       0     4                            2      0   
812       0     2                            5      1   
813       1     2                            4      1   
814       1     4                            4      1   
815       0     3                   

In [11]:
model_accuracy_score = accuracy_score(model_predictions["result"],model_predictions["Predicted Results"])

print("\n\nAccuracy Score:")
print("===============\n")
print(round(model_accuracy_score,2))



Accuracy Score:

0.76


In [None]:

gender_input = input("\nPlease enter Gender here (Male 1, Female 0) : ").strip()
race_input = input("\nPlease enter Race/ethnicity here (Group A,B,C,D,E  1,2,3,4,5) : ").strip()
parental_level_of_education_input = input("\nPlease enter parental level of education school - masters degree 1-6 : ").strip()
lunch_input = input("\nPlease enter lunch routine(standard, free) 1,0 here : ").strip()
test_preparation_course_input = input("\nPlease enter preparation here (0,1): ").strip()
math_score_input = input("\nPlease enter maths scores here : ").strip()
reading_score_input = input("\nPlease enter reading scores here : ").strip()
writing_score_input = input("\nPlease enter writing scores here : ").strip()



In [None]:
user_input = pd.DataFrame({ 'Gender': [gender_input],'Race': [race_input],'parental level of education': [parental_level_of_education_input],'Lunch': [lunch_input],'test preparation course': [test_preparation_course_input],'Math score': [math_score_input],'Reading score': [reading_score_input],'Writing score': [writing_score_input]})

print("\n\nUser Input Feature Vector:")
print("==========================\n")
print(user_input)

In [None]:
model = pickle.load(open('svc_trained_model.pkl', 'rb'))

In [19]:
predicted_Result = model.predict(user_input)

if(predicted_Result == 1): 
    prediction = "Student will pass"
if(predicted_Result == 0):
    prediction = "Student will fail"
pretty_table = PrettyTable()
pretty_table.add_column("       ** Prediction **       ",[prediction])
print(pretty_table)

+--------------------------------+
|        ** Prediction **        |
+--------------------------------+
|       Student will pass        |
+--------------------------------+


In [3]:
data = pd.read_csv("StudentsPerformance.csv")
data = np.array(data)

X = data[1:, 1:-1]
y = data[1:, -1]
y = y.astype('int')
X = X.astype('int')
# print(X,y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
log_reg = LogisticRegression()


log_reg.fit(X_train, y_train)

inputt=[int(x) for x in "1 4 1 1 0 7 8 9".split(' ')]
final=[np.array(inputt)]

b = log_reg.predict_proba(final)


pickle.dump(log_reg,open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))

ValueError: X has 8 features per sample; expecting 7

In [38]:
print(b)

[[1.00000000e+00 8.03032088e-29]]


In [39]:
output='{0:.{1}f}'.format(b[0][1], 2)

In [40]:
print(output)

0.00
