In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
sentences = [
    'I love machine learning',
    'I love deep learning',
    'Machine learning loves data',
    'I love Ai'
]

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

In [None]:
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences,padding='post')

In [None]:
print(tokenizer.word_index)

{'i': 1, 'love': 2, 'learning': 3, 'machine': 4, 'deep': 5, 'loves': 6, 'data': 7, 'ai': 8}


In [None]:
print(padded)

[[1 2 4 3]
 [1 2 5 3]
 [4 3 6 7]
 [1 2 8 0]]


In [None]:
# simple rnn creation

In [None]:
sentences = [
    'I love this product',
    'This is a bad product',
    'Amazing experience',
    'Worst Experience'
]

labels = [1,0,1,0]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences,padding='post')

print(padded)

[[ 4  5  1  2  0]
 [ 1  6  7  8  2]
 [ 9  3  0  0  0]
 [10  3  0  0  0]]


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense

In [None]:
x = padded
y = np.array(labels)

In [None]:
model = Sequential()
model.add(Embedding(input_dim=50,output_dim=16))
model.add(SimpleRNN(32))
model.add(Dense(1,activation='sigmoid'))

In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
model.fit(x,y,epochs=30)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2500 - loss: 0.7085
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.2500 - loss: 0.6999
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.5000 - loss: 0.6916
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.5000 - loss: 0.6833
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.7500 - loss: 0.6751
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7500 - loss: 0.6668
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7500 - loss: 0.6584
Epoch 8/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.7500 - loss: 0.6498
Epoch 9/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x7f477c707e60>

In [None]:
# SimpleRNN has a problem "Vanishing Gradients"

In [None]:
# LSTM

In [None]:
from tensorflow.keras.layers import LSTM

model = Sequential()
model.add(Embedding(input_dim=1000,output_dim=64))
model.add(LSTM(64))
model.add(Dense(1,activation='sigmoid'))

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(x,y,epochs=30)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2500 - loss: 0.6940
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7500 - loss: 0.6920
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.7500 - loss: 0.6900
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.7500 - loss: 0.6879
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.7500 - loss: 0.6858
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.7500 - loss: 0.6836
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.7500 - loss: 0.6812
Epoch 8/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.7500 - loss: 0.6787
Epoch 9/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x7f47781ce600>

In [None]:
import pandas as pd

df = pd.read_csv('Resume.csv')
df.head()

Unnamed: 0,Category,Resume
0,Data Science,Skills * Programming Languages: Python (pandas...
1,Data Science,Education Details \r\nMay 2013 to May 2017 B.E...
2,Data Science,"Areas of Interest Deep Learning, Control Syste..."
3,Data Science,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4,Data Science,"Education Details \r\n MCA YMCAUST, Faridab..."


In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

df['Category_encoding'] = encoder.fit_transform(df['Category'])

In [None]:
df.shape

(962, 3)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
tokenizer = Tokenizer(num_words=5000,oov_token="<OOV>")
tokenizer.fit_on_texts(df['Resume'])

In [None]:
sequences = tokenizer.texts_to_sequences(df['Resume'])
padded = pad_sequences(sequences,maxlen=300,padding='post')

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(padded,df['Category_encoding'],test_size=0.2,random_state=56)

In [None]:
# building an lstm model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense,Dropout

In [None]:
model = Sequential()
model.add(Embedding(input_dim=5000,output_dim=128))
model.add(LSTM(128,return_sequences=True))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(len(df['Category'].unique()),activation='softmax'))

In [None]:
model.compile(
    optimizer='adam',
    metrics=['accuracy'],
    loss='sparse_categorical_crossentropy'
)

In [None]:
model.summary()

In [None]:
model.fit(
    x_train,
    y_train,
    epochs=10,
    batch_size=32,
    validation_data=(x_test,y_test)
)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 779ms/step - accuracy: 0.0905 - loss: 3.1900 - val_accuracy: 0.2280 - val_loss: 3.0846
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 726ms/step - accuracy: 0.1795 - loss: 3.0337 - val_accuracy: 0.2021 - val_loss: 2.7574
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 786ms/step - accuracy: 0.2314 - loss: 2.7211 - val_accuracy: 0.3212 - val_loss: 2.4426
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 731ms/step - accuracy: 0.2947 - loss: 2.4272 - val_accuracy: 0.4145 - val_loss: 2.1674
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 786ms/step - accuracy: 0.3797 - loss: 2.2008 - val_accuracy: 0.4767 - val_loss: 1.9776
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 726ms/step - accuracy: 0.4412 - loss: 1.9907 - val_accuracy: 0.5026 - val_loss: 1.8606
Epoch 7/10
[1m25/25[

<keras.src.callbacks.history.History at 0x7f475af3ab70>

In [None]:
sample_resume = ["""Syed Hamza Faizan
Data Scientist
syed.hamza.faizan@gmail.com +91 8978838126
linkedin.com/in/syedhamzafaizan
PROFILE
Hyderabad, India
BE Information Technology graduate with hands-on experience across Machine Learning, Python
development, and Linux systems. Background includes US IT recruitment, Python development,
mentorship, and production exposure as a Linux System Administrator supporting RHEL 7→8 migrations,
OS patching validation, and secure infrastructure operations. Skilled in Python, SQL, ML/DL (TensorFlow,
PyTorch), Flask/FastAPI, with experience delivering predictive models, healthcare chatbots, and production
ready ML solutions, bridging model development and real-world deployment.
PROFESSIONAL EXPERIENCE
Linux System Admin
Freelance - US
•
Contributed to a large-scale RHEL migration project, upgrading servers from
RHEL 7 to RHEL 8, and provided ongoing support on Azure VMs, ensuring high
availability, minimal downtime, and application stability.
•
Successfully installed and configured security certificates, Licenses in
Development, Pre-Production, and Production environments.
•
Performed post-migration server decommissioning and validation using IBM SCG
CMP Console to safely turn off or decommission servers, while verifying
application functionality and data integrity.
•
Utilized PowerShell to perform administrative operations and automate routine
tasks on RHEL 8 servers, improving efficiency and consistency in server
management.
•
Managed file transfers and server access using PuTTY and WinSCP, supporting
system updates and deployments.
•
Monitored incidents and raised tickets using ServiceNow, ensuring timely
resolution of server or application issues.
Data Science Intern
Full Stack Academy
•
Worked with data science tools like Python, SQL, Exploratory Data Analysis (EDA),
and Machine Learning. I also performed hands on work on Deep Learning
projects.
Mentored students on machine learning project design and implementation
•
•
Gained experience in feature engineering, model selection, and performance
evaluation.
•
Developed data analysis and EDA projects. This involved cleaning, transforming,
and visualizing datasets to find useful insights.
Delivered hands-on Python, ML, and data analysis training to students
•
Information Technology Recruiter
Yonker Technologies
•
Managed and recruited technical talent for US-based IT clients, conducted
comprehensive screening and interviews
•
Specialized in bench sales for marketing and placing bench consultants with
multiple vendors and clients
•
Negotiated rates, managed consultant submissions, and arranged interview
schedules to maximize deal closures.
Intern
Datapoint IT & Hardware Pvt. Ltd.
Developed Django-based hospital portal for patient/doctor scheduling
•
Engineered backend databases and user-facing solutions
05/2025 – Present
Hyderabad, India
06/2025 – 09/2025
Hyderabad, India
04/2023 – 04/2025
Hyderabad, India
08/2023 – 09/2023
Hyderabad, India
•
EDUCATION
B.E Information Technology
Osmania University
Intermediate
MS Junior College
School
Success the school
PROJECTS
09/2021 – 08/2025
Hyderabad, India
06/2019 – 04/2021
Hyderabad, India
04/2019
Care Resale Value Prediction
•
Built a predictive model that estimates a car’s resale value based on factors like
brand, model, year, fuel type, mileage, and more.
•
Performed extensive EDA, data cleaning, and feature engineering to identify key
factors affecting price.
•
Explored and visualized the dataset to uncover patterns and relationships using
Python libraries like Pandas, NumPy, and Seaborn.
•
Tested and compared multiple machine learning models — including Linear
Regression, Random Forest, and XGBoost — to find the best fit for price
prediction.
Disease Diagnosis Using Chatbot
•
Developed a disease diagnostic chatbot that interacts with patients and provides
personalized recommendations.
•
Designed the system to accept patient queries and return relevant information,
such as doctor suggestions, diet plans, and disease explanations.
•
Implemented it using Python and included rule-based or machine learning logic
for symptom analysis and response generation.
Doctor and Patient Portal
•
Developed a doctor-patient portal that allows patients to register, view available
slots, and book appointments with doctors based on their availability.
•
Enabled doctors to view incoming appointment requests and accept or reject
them, streamlining scheduling and communication.
•
Implemented using Python and Django, managing backend databases for patient
and doctor records.
SKILLS
08/2025 – 09/2025
03/2025 – 05/2025
08/2024 – 11/2024
Technical Skills
Python
SQL
Machine Learning
Exploratory Data Analysis
Power BI
Deep Learning
Linux
PowerShell (server management)
Azure Virtual Machines (VMs)
Server Migration & Decommissioning
ServiceNow
SSL/TLS Certificate Management
COURSES
Data Science with AI
Full Stack Academy
02/2025 – 06/2025
Hyderabad, Indi"""]

seq = tokenizer.texts_to_sequences(sample_resume)
pad = pad_sequences(seq,maxlen=300,padding='post')

prediction = model.predict(pad)
predicted_category = encoder.inverse_transform([prediction.argmax()])

print("The predicted category is: ",predicted_category[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
The predicted category is:  Database
