# 1. MACHINE LEARNING - CLUSTERING

In [1]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np
from collections import Counter

In [2]:
df = pd.read_csv("C:\\Users\\sanka\\Downloads\\RAI_train.csv")
df.head()

Unnamed: 0,T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,target
0,-70,-61,-66,-53,-51,-63,-82,-57,-76,-78,-66,-66,-61,-59,-73,-75,-63,-77,B37
1,-77,-74,-71,-76,-65,-63,-66,-52,-55,-75,-72,-75,-74,-61,-64,-63,-53,-63,B61
2,-53,-38,-55,-66,-62,-62,-65,-70,-62,-52,-56,-53,-66,-68,-72,-60,-68,-77,A19
3,-72,-62,-59,-65,-65,-65,-78,-82,-83,-59,-84,-60,-64,-83,-69,-72,-95,-73,A22
4,-67,-69,-65,-63,-59,-53,-70,-72,-71,-60,-61,-57,-54,-76,-61,-66,-71,-80,A33


In [3]:
X = df.drop(columns=['target'])
y = df['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
df['cluster'] = kmeans.labels_

In [6]:
def predict_target(new_data):
    new_data_scaled = scaler.transform([new_data])
    cluster = kmeans.predict(new_data_scaled)[0]
    cluster_targets = df[df['cluster'] == cluster]['target']
    most_common_target = Counter(cluster_targets).most_common(1)[0][0]
    return most_common_target

new_data_point = []
print("Enter the values for T1 to T18:")
for i in range(1, 19):
    value = float(input(f"T{i}: "))
    new_data_point.append(value)

predicted_target = predict_target(new_data_point)
print("Predicted target:", predicted_target)


Enter the values for T1 to T18:
T1: -76
T2: -83
T3: -70
T4: -66
T5: -64
T6: -72
T7: -64
T8: -9
T9: -60
T10: -76
T11: -83
T12: -78
T13: -81
T14: -81
T15: -81
T16: -70
T17: -60
T18: -60
Predicted target: B58




# 2. MACHINE LEARNING - CLASSIFICATION

In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [9]:
data = pd.read_csv('C:\\Users\\sanka\\Downloads\\RAI_train.csv')

X = data.drop(columns=['target'])
y = data['target']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)
rf_train_pred = rf_clf.predict(X_train)
rf_test_pred = rf_clf.predict(X_test)
rf_train_accuracy = accuracy_score(y_train, rf_train_pred)
print("\nRF - Classification Report (Training):")
print(classification_report(y_train, rf_train_pred))


RF - Classification Report (Training):
              precision    recall  f1-score   support

          A1       1.00      1.00      1.00       176
         A10       1.00      1.00      1.00       169
         A11       1.00      1.00      1.00       171
         A12       1.00      1.00      1.00       166
         A13       1.00      1.00      1.00       178
         A14       1.00      1.00      1.00       341
         A15       1.00      1.00      1.00       327
         A16       1.00      1.00      1.00       172
         A17       1.00      1.00      1.00       173
         A18       1.00      1.00      1.00       157
         A19       1.00      1.00      1.00       168
          A2       1.00      1.00      1.00       165
         A20       1.00      1.00      1.00       160
         A21       1.00      1.00      1.00       335
         A22       1.00      1.00      1.00       172
         A23       1.00      1.00      1.00       165
         A24       1.00      1.00      1.

In [12]:
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train, y_train)
svm_train_pred = svm_clf.predict(X_train)
svm_test_pred = svm_clf.predict(X_test)
svm_train_accuracy = accuracy_score(y_train, svm_train_pred)
print("\nSVM - Classification Report (Training):")
print(classification_report(y_train, svm_train_pred))


SVM - Classification Report (Training):
              precision    recall  f1-score   support

          A1       0.96      0.94      0.95       176
         A10       0.80      1.00      0.89       169
         A11       0.93      1.00      0.97       171
         A12       1.00      1.00      1.00       166
         A13       1.00      1.00      1.00       178
         A14       1.00      1.00      1.00       341
         A15       0.99      0.96      0.98       327
         A16       1.00      1.00      1.00       172
         A17       1.00      0.98      0.99       173
         A18       0.99      1.00      1.00       157
         A19       0.99      0.99      0.99       168
          A2       1.00      1.00      1.00       165
         A20       1.00      1.00      1.00       160
         A21       0.94      0.99      0.97       335
         A22       1.00      1.00      1.00       172
         A23       1.00      1.00      1.00       165
         A24       1.00      1.00      1

In [13]:
def get_user_input_and_predict():
    user_input = []
    for column in X_train.columns:
        value = float(input(f"Enter value for {column}: "))
        user_input.append(value)
    
    user_input_df = pd.DataFrame([user_input], columns=X_train.columns)
    
    rf_prediction = rf_clf.predict(user_input_df)
    svm_prediction = svm_clf.predict(user_input_df)
    
    print(f"Random Forest Prediction: {rf_prediction[0]}")
    print(f"SVM Prediction: {svm_prediction[0]}")

while True:
    get_user_input_and_predict()
    cont = input("Do you want to enter another set of values? (yes/no): ").strip().lower()
    if cont != 'yes':
        break


Enter value for T1: -76
Enter value for T2: -83
Enter value for T3: -70
Enter value for T4: -66
Enter value for T5: -64
Enter value for T6: -72
Enter value for T7: -64
Enter value for T8: -69
Enter value for T9: -60
Enter value for T10: -76
Enter value for T11: -83
Enter value for T12: -78
Enter value for T13: -81
Enter value for T14: -81
Enter value for T15: -81
Enter value for T16: -70
Enter value for T17: -60
Enter value for T18: -60
Random Forest Prediction: B74
SVM Prediction: B74
Do you want to enter another set of values? (yes/no): no


# 5. TEXT SUMMARIZATION

In [14]:
import fitz  
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

In [15]:
def extract_text_from_pdf(pdf_path):
    text = ""
    doc = fitz.open(pdf_path)
    for page in doc:
        text += page.get_text()
    return text

def summarize_text(text, sentence_count=5):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, sentence_count)
    return " ".join([str(sentence) for sentence in summary])

pdf_path = "C:\\Users\\sanka\\Downloads\\RAI\\Operations Management.pdf"

pdf_text = extract_text_from_pdf(pdf_path)
print("PDF text extracted.")

summary_length = 5

summary = summarize_text(pdf_text, sentence_count=summary_length)

print("Summary:")
print(summary)


PDF text extracted.
Summary:
Functions of Operations Management The fundamental role of the operations management is to ensure that it understands the whole operation process for an effective management of the activities that are involved. Considerations such as the manner in which the management will supply the products of the organization, the level of expertise that will be required in the production processes, the levels of flexibilities that will be involved in operations among others are made prior to establishment of strategies. The satisfaction of customers can be realized from two perspectives: costs of the good or service offered to the customer and the delivery of such products in time as demanded for by the customers. Objectives are then made and pursued on how best customers can be kept satisfied with the organization’s products and at the same time, resources into productions are organized at cost that are reduced to minimum possible levels. The demand for better service 