In [45]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'student-career-prediction:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5627874%2F9295523%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240917%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240917T122559Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3033a1c512fad5abea9a14567cf4afcb63f479746b3905d6163598447a5c37e7e253399cac6e8b9273798f5cd2350ad883ab0a284926817fd831a70108cbf3f02135eb45fd424efec26264a9994a671aeff2cf4f9557fe509d4ab75f6ec5084e2013091227841b8234477e3146419193e3511a1fe30beb64b8f4ecbd91275ef7fcd03f9f57667832e7655edfb08ab34cca09c435b65753451ac8506bf0c61224f6fa1ffd76d6524d23e5ec2d37a409e4d388efca4ea00419e40b2a0879234e313d03b751a8dd23d43ff05007c0635266ebacc95f8a6de33fe4bf36119194fb698a3155081ee1915c8d47738cf6fc3f9aa996fef9dea12528607bde9dd222694a'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading student-career-prediction, 21112 bytes compressed
Downloaded and uncompressed: student-career-prediction
Data source import complete.


# **upload data**

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [47]:
data = pd.read_csv('/kaggle/input/student-career-prediction/stud_training.csv')
data.head()

Unnamed: 0,Drawing,Dancing,Singing,Sports,Video Game,Acting,Travelling,Gardening,Animals,Photography,...,Engeeniering,Doctor,Pharmisist,Cycling,Knitting,Director,Journalism,Bussiness,Listening Music,Courses
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,BBA- Bachelor of Business Administration
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,BBA- Bachelor of Business Administration
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,BBA- Bachelor of Business Administration
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,BBA- Bachelor of Business Administration
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,BBA- Bachelor of Business Administration


In [48]:
testing_data =  pd.read_csv('/kaggle/input/student-career-prediction/stud_testing.csv')
testing_data.head()

Unnamed: 0,Drawing,Dancing,Singing,Sports,Video Game,Acting,Travelling,Gardening,Animals,Photography,...,Engeeniering,Doctor,Pharmisist,Cycling,Knitting,Director,Journalism,Bussiness,Listening Music,Courses
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,BBA- Bachelor of Business Administration
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,BEM- Bachelor of Event Management
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Integrated Law Course- BA + LL.B
3,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,BJMC- Bachelor of Journalism and Mass Communic...
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,BFD- Bachelor of Fashion Designing


# **Preprocessing data**

In [49]:
data.shape

(3500, 60)

In [50]:
data.duplicated().sum()

3313

In [51]:
data.shape

(3500, 60)

In [52]:
data.drop_duplicates(inplace=True)

In [53]:
testing_data.shape

(35, 60)

In [54]:
data.isnull().sum()

Unnamed: 0,0
Drawing,0
Dancing,0
Singing,0
Sports,0
Video Game,0
Acting,0
Travelling,0
Gardening,0
Animals,0
Photography,0


In [55]:
data.columns

Index(['Drawing', 'Dancing', 'Singing', 'Sports', 'Video Game', 'Acting',
       'Travelling', 'Gardening', 'Animals', 'Photography', 'Teaching',
       'Exercise', 'Coding', 'Electricity Components', 'Mechanic Parts',
       'Computer Parts', 'Researching', 'Architecture', 'Historic Collection',
       'Botany', 'Zoology', 'Physics', 'Accounting', 'Economics', 'Sociology',
       'Geography', 'Psycology', 'History', 'Science', 'Bussiness Education',
       'Chemistry', 'Mathematics', 'Biology', 'Makeup', 'Designing',
       'Content writing', 'Crafting', 'Literature', 'Reading', 'Cartooning',
       'Debating', 'Asrtology', 'Hindi', 'French', 'English', 'Urdu',
       'Other Language', 'Solving Puzzles', 'Gymnastics', 'Yoga',
       'Engeeniering', 'Doctor', 'Pharmisist', 'Cycling', 'Knitting',
       'Director', 'Journalism', 'Bussiness', 'Listening Music', 'Courses'],
      dtype='object')

In [56]:
testing_data.columns

Index(['Drawing', 'Dancing', 'Singing', 'Sports', 'Video Game', 'Acting',
       'Travelling', 'Gardening', 'Animals', 'Photography', 'Teaching',
       'Exercise', 'Coding', 'Electricity Components', 'Mechanic Parts',
       'Computer Parts', 'Researching', 'Architecture', 'Historic Collection',
       'Botany', 'Zoology', 'Physics', 'Accounting', 'Economics', 'Sociology',
       'Geography', 'Psycology', 'History', 'Science', 'Bussiness Education',
       'Chemistry', 'Mathematics', 'Biology', 'Makeup', 'Designing',
       'Content writing', 'Crafting', 'Literature', 'Reading', 'Cartooning',
       'Debating', 'Asrtology', 'Hindi', 'French', 'English', 'Urdu',
       'Other Language', 'Solving Puzzles', 'Gymnastics', 'Yoga',
       'Engeeniering', 'Doctor', 'Pharmisist', 'Cycling', 'Knitting',
       'Director', 'Journalism', 'Bussiness', 'Listening Music', 'Courses'],
      dtype='object')

In [57]:
data['Sports'] = (data['Sports'] + data['Cycling'] + data['Gymnastics'] + data['Exercise'] + data['Dancing'] + data['Yoga']).replace([2, 3, 4, 5, 6] , 1)
data['Languages'] = (data['Hindi'] + data['Urdu'] + data['Other Language'] + data['French']).replace([2, 3, 4] , 1)
data['Animals'] = (data['Animals'] + data['Zoology']).replace([2] , 1)
data['Bussiness'] = (data['Bussiness Education'] + data['Bussiness']).replace([2] , 1)

data.drop(['Hindi', 'Urdu', 'Other Language', 'Cycling', 'Gymnastics', 'Exercise', 'Dancing' , 'Engeeniering' , 'Doctor' , 'Zoology', 'Yoga' , 'French', 'Makeup' , 'Bussiness Education'], axis=1, inplace=True)

In [58]:
testing_data['Sports'] = (testing_data['Sports'] + testing_data['Cycling'] + testing_data['Gymnastics'] + testing_data['Exercise'] + testing_data['Dancing'] + testing_data['Yoga']).replace([2, 3, 4, 5, 6] , 1)
testing_data['Languages'] = (testing_data['Hindi'] + testing_data['Urdu'] + testing_data['Other Language'] + testing_data['French']).replace([2, 3, 4] , 1)
testing_data['Animals'] = (testing_data['Animals'] + testing_data['Zoology']).replace([2] , 1)
testing_data['Bussiness'] = (testing_data['Bussiness Education'] + testing_data['Bussiness']).replace([2] , 1)

testing_data.drop(['Hindi', 'Urdu', 'Other Language', 'Cycling', 'Gymnastics', 'Exercise', 'Dancing' , 'Engeeniering' , 'Doctor' , 'Zoology' , 'Yoga' , 'French', 'Makeup' , 'Bussiness Education'], axis=1, inplace=True)

In [59]:
data.shape

(187, 47)

In [60]:
testing_data.shape

(35, 47)

In [61]:
data.columns

Index(['Drawing', 'Singing', 'Sports', 'Video Game', 'Acting', 'Travelling',
       'Gardening', 'Animals', 'Photography', 'Teaching', 'Coding',
       'Electricity Components', 'Mechanic Parts', 'Computer Parts',
       'Researching', 'Architecture', 'Historic Collection', 'Botany',
       'Physics', 'Accounting', 'Economics', 'Sociology', 'Geography',
       'Psycology', 'History', 'Science', 'Chemistry', 'Mathematics',
       'Biology', 'Designing', 'Content writing', 'Crafting', 'Literature',
       'Reading', 'Cartooning', 'Debating', 'Asrtology', 'English',
       'Solving Puzzles', 'Pharmisist', 'Knitting', 'Director', 'Journalism',
       'Bussiness', 'Listening Music', 'Courses', 'Languages'],
      dtype='object')

In [62]:
new_order = ['Drawing', 'Singing', 'Sports', 'Video Game', 'Acting', 'Travelling',
       'Gardening', 'Animals', 'Photography', 'Teaching', 'Coding',
       'Electricity Components', 'Mechanic Parts', 'Computer Parts',
       'Researching', 'Architecture', 'Historic Collection', 'Botany',
       'Physics', 'Accounting', 'Economics', 'Sociology', 'Geography',
       'Psycology', 'History', 'Science', 'Chemistry', 'Mathematics',
       'Biology', 'Designing', 'Content writing', 'Crafting', 'Literature',
       'Reading', 'Cartooning', 'Debating', 'Asrtology', 'English',
       'Solving Puzzles', 'Pharmisist', 'Knitting', 'Director', 'Journalism',
       'Bussiness', 'Listening Music', 'Languages', 'Courses']

data = data[new_order]
testing_data = testing_data[new_order]
data.head()

Unnamed: 0,Drawing,Singing,Sports,Video Game,Acting,Travelling,Gardening,Animals,Photography,Teaching,...,English,Solving Puzzles,Pharmisist,Knitting,Director,Journalism,Bussiness,Listening Music,Languages,Courses
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,1,0,0,BBA- Bachelor of Business Administration
1,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,1,0,0,BBA- Bachelor of Business Administration
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,BBA- Bachelor of Business Administration
3,0,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,1,0,0,BBA- Bachelor of Business Administration
4,0,0,0,0,0,0,0,0,0,0,...,1,1,0,0,1,0,1,0,0,BBA- Bachelor of Business Administration


In [63]:
data.columns

Index(['Drawing', 'Singing', 'Sports', 'Video Game', 'Acting', 'Travelling',
       'Gardening', 'Animals', 'Photography', 'Teaching', 'Coding',
       'Electricity Components', 'Mechanic Parts', 'Computer Parts',
       'Researching', 'Architecture', 'Historic Collection', 'Botany',
       'Physics', 'Accounting', 'Economics', 'Sociology', 'Geography',
       'Psycology', 'History', 'Science', 'Chemistry', 'Mathematics',
       'Biology', 'Designing', 'Content writing', 'Crafting', 'Literature',
       'Reading', 'Cartooning', 'Debating', 'Asrtology', 'English',
       'Solving Puzzles', 'Pharmisist', 'Knitting', 'Director', 'Journalism',
       'Bussiness', 'Listening Music', 'Languages', 'Courses'],
      dtype='object')

In [64]:
from sklearn.preprocessing import LabelEncoder

# Create the encoder
encoder = LabelEncoder()

data['Courses'] = encoder.fit_transform(data['Courses'])
testing_data['Courses'] = encoder.transform(testing_data['Courses'])


In [65]:
data.to_csv('data.csv', index=False)
testing_data.to_csv('testing_data.csv', index=False)

# **Model**

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score , accuracy_score , confusion_matrix


In [67]:
X = data.drop(['Courses'], axis = 1)
y = data['Courses']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_testing = testing_data.drop(['Courses'], axis = 1)
y_testing = testing_data['Courses']

In [68]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


Random Forest

In [69]:
from sklearn.ensemble import RandomForestClassifier

In [70]:
model1 = RandomForestClassifier()

# training model
model1.fit(X_train , y_train)

# Prediction
y_pred1 = model1.predict(X_test)

In [71]:


accuracy1 = accuracy_score(y_pred1, y_test)
print(f'accuracy: {accuracy1}')

# Precision, Recall, F1 Score
precision1 = precision_score(y_test, y_pred1 , average='macro')
recall1 = recall_score(y_test, y_pred1,average='macro')
f11 = f1_score(y_test, y_pred1,average='macro')


print(f'precision: {precision1}')
print(f'Recall: {recall1}')
print(f'F1 Score: {f11}')

accuracy: 0.9210526315789473
precision: 0.9304347826086956
Recall: 0.9565217391304348
F1 Score: 0.9378881987577639


  _warn_prf(average, modifier, msg_start, len(result))


In [72]:
pred_testing = model1.predict(X_testing)
accuracy_testing = accuracy_score(pred_testing, y_testing)
accuracy_testing

0.9714285714285714

Decision Tree

In [73]:
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred2 = dt_classifier.predict(X_test)
accuracy2 = accuracy_score(y_test, y_pred2)

print(f'Accuracy: {accuracy2}')


# Precision, Recall, F1 Score
precision2 = precision_score(y_test, y_pred2,average='macro')
recall2 = recall_score(y_test, y_pred2,average='macro')
f12 = f1_score(y_test, y_pred2,average='macro')


print(f'Precision: {precision2}')
print(f'Recall: {recall2}')
print(f'F1 Score: {f12}')



Accuracy: 0.7105263157894737
Precision: 0.636
Recall: 0.6666666666666665
F1 Score: 0.6281904761904762


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [74]:
pred_testing = dt_classifier.predict(X_testing)
accuracy_testing = accuracy_score(pred_testing, y_testing)
accuracy_testing

0.9714285714285714

XGB

In [75]:
from xgboost import XGBClassifier

model2 = XGBClassifier()
model2.fit(X_train, y_train)
y_pred3 = model2.predict(X_test)


accuracy3 = accuracy_score(y_test, y_pred3)

print(f'Accuracy: {accuracy3}')

precision3 = precision_score(y_test, y_pred3,average='micro')
recall3 = recall_score(y_test, y_pred3,average='micro')
f13 = f1_score(y_test, y_pred3,average='micro')

# Print individual scores
print(f'Precision: {precision3}')
print(f'Recall: {recall3}')
print(f'F1 Score: {f13}')

Accuracy: 0.7894736842105263
Precision: 0.7894736842105263
Recall: 0.7894736842105263
F1 Score: 0.7894736842105263


In [76]:
y_pred3

array([ 3, 30,  1, 29, 34, 33,  5, 32, 24, 16, 32, 28, 31, 20,  1, 33, 31,
       16,  4, 25, 14, 12, 19, 30, 25, 32, 24, 29, 15, 34, 25, 17, 30,  5,
        5, 27,  6, 30])

# **Saving the Model**

In [77]:
# Save the models for future use
import joblib

joblib.dump(model1, 'career_prediction_model.joblib')

['career_prediction_model.joblib']

In [78]:
# Save the model as plk file
import pickle
filename = 'career_prediction_model.pkl'
pickle.dump(model1, open(filename, 'wb'))



*   List item
*   List item



# Functions

In [79]:
def career(pred):
  careers = ['BBA- Bachelor of Business Administration',
       'BEM- Bachelor of Event Management',
       'Integrated Law Course- BA + LL.B',
       'BJMC- Bachelor of Journalism and Mass Communication',
       'BFD- Bachelor of Fashion Designing',
       'BBS- Bachelor of Business Studies',
       'BTTM- Bachelor of Travel and Tourism Management',
       'BVA- Bachelor of Visual Arts', 'BA in History',
       'B.Arch- Bachelor of Architecture',
       'BCA- Bachelor of Computer Applications',
       'B.Sc.- Information Technology', 'B.Sc- Nursing',
       'BPharma- Bachelor of Pharmacy', 'BDS- Bachelor of Dental Surgery',
       'Animation, Graphics and Multimedia', 'B.Sc- Applied Geology',
       'B.Sc.- Physics', 'B.Sc. Chemistry', 'B.Sc. Mathematics',
       'B.Tech.-Civil Engineering',
       'B.Tech.-Computer Science and Engineering',
       'B.Tech.-Electronics and Communication Engineering',
       'B.Tech.-Electrical and Electronics Engineering',
       'B.Tech.-Mechanical Engineering', 'B.Com- Bachelor of Commerce',
       'BA in Economics', 'CA- Chartered Accountancy',
       'CS- Company Secretary', 'Diploma in Dramatic Arts', 'MBBS',
       'Civil Services', 'BA in English', 'BA in Hindi', 'B.Ed.']
  result = []
  for i in list(pred):
       result.append(careers[i])

       return result

In [80]:
# from flask import Flask, request, jsonify
# import joblib
# import pandas as pd

# # Initialize Flask app
# app = Flask(__name__)

# # Load the saved model
# model = joblib.load('career_prediction_model.joblib')

# # Function to return career names from prediction indices
# def career(pred):
#     careers = ['BBA- Bachelor of Business Administration',
#            'BEM- Bachelor of Event Management',
#            'Integrated Law Course- BA + LL.B',
#            'BJMC- Bachelor of Journalism and Mass Communication',
#            'BFD- Bachelor of Fashion Designing',
#            'BBS- Bachelor of Business Studies',
#            'BTTM- Bachelor of Travel and Tourism Management',
#            'BVA- Bachelor of Visual Arts', 'BA in History',
#            'B.Arch- Bachelor of Architecture',
#            'BCA- Bachelor of Computer Applications',
#            'B.Sc.- Information Technology', 'B.Sc- Nursing',
#            'BPharma- Bachelor of Pharmacy', 'BDS- Bachelor of Dental Surgery',
#            'Animation, Graphics and Multimedia', 'B.Sc- Applied Geology',
#            'B.Sc.- Physics', 'B.Sc. Chemistry', 'B.Sc. Mathematics',
#            'B.Tech.-Civil Engineering',
#            'B.Tech.-Computer Science and Engineering',
#            'B.Tech.-Electronics and Communication Engineering',
#            'B.Tech.-Electrical and Electronics Engineering',
#            'B.Tech.-Mechanical Engineering', 'B.Com- Bachelor of Commerce',
#            'BA in Economics', 'CA- Chartered Accountancy',
#            'CS- Company Secretary', 'Diploma in Dramatic Arts', 'MBBS',
#            'Civil Services', 'BA in English', 'BA in Hindi', 'B.Ed.']
#     result = []
#     for i in list(pred):
#         result.append(careers[i])
#     return result

# # Define API endpoint for career prediction
# @app.route('/predict', methods=['POST'])
# def predict():
#     # Get the input data from the POST request
#     input_data = request.json

#     # Convert the input data into a DataFrame
#     input_df = pd.DataFrame([input_data])

#     # Perform prediction
#     prediction = model.predict(input_df)

#     # Get the career result based on the prediction
#     career_result = career(prediction)

#     # Return the result as a JSON response
#     return jsonify({'prediction': career_result})

# # Start the Flask app
# if __name__ == '__main__':
#     app.run(debug=True)


In [81]:
# import streamlit as st
# import numpy as np
# import pickle

# # # Load the pre-trained model
# # with open('./career_prediction_model.pkl', 'rb') as f:
# #     model = pickle.load(f)
# model = joblib.load('career_prediction_model.joblib')
# # Main title of the website
# st.title('اختيار التخصص الأنسب')

# # Description
# st.write("أجب عن الأسئلة التالية لتحديد التخصص أو الكلية الأنسب لك")

# # Questions (You can modify the questions based on your project)
# coding = st.radio('هل تستمتع بالبرمجة؟', ('نعم', 'لا'))
# math = st.radio('هل تجد متعة في حل المعادلات الرياضية؟', ('نعم', 'لا'))
# design = st.radio('هل تستمتع بالتصميم الجرافيكي أو الفنون؟', ('نعم', 'لا'))
# science = st.radio('هل تهتم بالعلوم الطبيعية مثل الفيزياء أو الكيمياء؟', ('نعم', 'لا'))
# languages = st.radio('هل تستمتع بدراسة اللغات الأجنبية؟', ('نعم', 'لا'))
# business = st.radio('هل تفضل العمل في مجال الاقتصاد أو إدارة الأعمال؟', ('نعم', 'لا'))

# # Convert the answers to numeric values (1 for Yes, 0 for No)
# answers = [
#     1 if coding == 'نعم' else 0,
#     1 if math == 'نعم' else 0,
#     1 if design == 'نعم' else 0,
#     1 if science == 'نعم' else 0,
#     1 if languages == 'نعم' else 0,
#     1 if business == 'نعم' else 0,
#     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#     0, 0, 0, 0
# ]

# # When the button is clicked
# if st.button('احصل على التوصية'):
#     # Convert the answers into a numpy array as the model expects it
#     user_input = np.array(answers).reshape(1, -1)

#     # Use the model to predict
#     prediction = model.predict(user_input)

#     # Display the recommendation to the user
#     st.write(f'The most suitable major for you is: {prediction[0]}')


In [82]:
# pip install streamlit

In [83]:
# from flask import Flask, request, jsonify, render_template
# import joblib
# import pandas as pd

# def career(pred):
#   careers = ['BBA- Bachelor of Business Administration',
#        'BEM- Bachelor of Event Management',
#        'Integrated Law Course- BA + LL.B',
#        'BJMC- Bachelor of Journalism and Mass Communication',
#        'BFD- Bachelor of Fashion Designing',
#        'BBS- Bachelor of Business Studies',
#        'BTTM- Bachelor of Travel and Tourism Management',
#        'BVA- Bachelor of Visual Arts', 'BA in History',
#        'B.Arch- Bachelor of Architecture',
#        'BCA- Bachelor of Computer Applications',
#        'B.Sc.- Information Technology', 'B.Sc- Nursing',
#        'BPharma- Bachelor of Pharmacy', 'BDS- Bachelor of Dental Surgery',
#        'Animation, Graphics and Multimedia', 'B.Sc- Applied Geology',
#        'B.Sc.- Physics', 'B.Sc. Chemistry', 'B.Sc. Mathematics',
#        'B.Tech.-Civil Engineering',
#        'B.Tech.-Computer Science and Engineering',
#        'B.Tech.-Electronics and Communication Engineering',
#        'B.Tech.-Electrical and Electronics Engineering',
#        'B.Tech.-Mechanical Engineering', 'B.Com- Bachelor of Commerce',
#        'BA in Economics', 'CA- Chartered Accountancy',
#        'CS- Company Secretary', 'Diploma in Dramatic Arts', 'MBBS',
#        'Civil Services', 'BA in English', 'BA in Hindi', 'B.Ed.']
#   result = []
#   for i in list(pred):
#        result.append(careers[i])

#        return result

# # Initialize Flask app
# app = Flask(__name__)

# # Load the saved model
# model = joblib.load('career_prediction_model.joblib')

# @app.route('/')
# def home():
#     return '''
#     <h1>Welcome to the Career Recommendation System</h1>
#     <form action="/predict" method="post">
#         <label for="feature1">Feature 1:</label>
#         <input type="text" id="feature1" name="feature1" required><br><br>

#         <label for="feature2">Feature 2:</label>
#         <input type="text" id="feature2" name="feature2" required><br><br>

#         <!-- Add more features as needed -->

#         <input type="submit" value="Submit">
#     </form>
#     '''

# # Define API endpoint for career prediction
# @app.route('/predict', methods=['POST'])
# def predict():
#     try:
#         # Get the input data from the POST request
#         input_data = request.form.to_dict()

#         # Convert binary choice to numerical format if needed by the model
#         binary_choice = input_data.get('binary_choice')
#         if binary_choice == 'true':
#             input_data['binary_choice'] = 1
#         elif binary_choice == 'false':
#             input_data['binary_choice'] = 0
#         else:
#             return jsonify({'error': 'Invalid binary choice value'}), 400

#         # Convert the input data into a DataFrame
#         input_df = pd.DataFrame([input_data])

#         # Perform prediction
#         prediction = model.predict(input_df)

#         # Get the career result based on the prediction
#         career_result = career(prediction)

#         # Return the result as a JSON response
#         return jsonify({'prediction': career_result})

#     except Exception as e:
#         return jsonify({'error': str(e)}), 500

# # Start the Flask app
# if __name__ == '__main__':
#     app.run(debug=True)
