In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

#task 1: clustering

train_data = pd.read_excel("/content/train.xlsx")
target_variable = train_data.pop("target")

# Preprocessing the numerical features
scaler = StandardScaler()
scaled_train_data = scaler.fit_transform(train_data)

# I found the hyperparameters by running a grid search
#previously I tried K-means but it was yielding quite poor results with this dataset
svc = SVC(C=0.1, gamma=0.1, kernel='rbf')

svc.fit(scaled_train_data, target_variable)

# Predicting cluster labels
train_cluster_labels = svc.predict(scaled_train_data)

# Calculating silhouette score for checking accuracy
silhouette_score_train = silhouette_score(scaled_train_data, train_cluster_labels)
print(f"Silhouette score (train): {silhouette_score_train:.4f}")






Silhouette score (train): 0.5791


In [None]:
# A new data point (for testing purpose)
new_data_point = pd.DataFrame({"T1":-70,"T2":-59,"T3":-67,"T4":-58,"T5":-91,"T6":-99,"T7":-76,"T8":-54,"T9":-93,"T10":-72,"T11":-83,"T12":-54,"T13":-77,"T14":-65,"T15":-82,"T16":-88,"T17":-54,"T18":-76}, index=[0])
# Preprocessing
scaled_new_data_point = scaler.transform(new_data_point)

# Using the best model to predict the cluster label
predicted_cluster = svc.predict(scaled_new_data_point)

print(f"Predicted cluster for the new data point: {predicted_cluster[0]}")





Predicted cluster for the new data point: A39


In [None]:
# task2: clasification

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


data = pd.read_excel("/content/train.xlsx")
X = data.drop("target", axis=1)  # Features
y = data["target"]  # Target variable

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# training with the Random Forest model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Making predictions on test set
y_pred = model.predict(X_test)

# Evaluating accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Train accuracy: {accuracy:.4f}")




Train accuracy: 0.9894


In [None]:
#I tried using other Classifiers like SVM but Random Forest had the highest training accuracy, so I'm proceeding with it

test_data = pd.read_excel("/content/test.xlsx")

X_test = test_data

# Generating predictions with Random Forest
predicted_targets = model.predict(X_test)

test_data["predicted_target"] = predicted_targets


print(test_data["predicted_target"])



0        B74
1        A10
2        B65
3        B20
4        A67
        ... 
15747    A71
15748    B11
15749    B69
15750    A38
15751    A80
Name: predicted_target, Length: 15752, dtype: object


In [None]:
#Exporting the target values to Excel file so that it can be easily shared
test_data["predicted_target"].to_excel("target_values.xlsx", index=False)

In [None]:
!pip install Streamlit

In [None]:
!npm install localtunnel


In [None]:
%%writefile app.py

import streamlit as st
import pandas as pd

df = pd.read_excel("/content/rawdata.xlsx")
# Counting picking and placing activities by date
picking_counts = df[df['activity'] == 'picked'].groupby('date')['number'].sum()
placing_counts = df[df['activity'] == 'placed'].groupby('date')['number'].sum()


results = pd.DataFrame({'Picking': picking_counts, 'Placing': placing_counts})
print(results)


st.header("Datewise Picking/Placing Activity")

# Display the DataFrame directly with st.dataframe
st.dataframe(results)

# Or, display individual counts with st.metric
st.metric(label="Total Picking Activities", value=results['Picking'].sum())
st.metric(label="Total Placing Activities", value=results['Placing'].sum())

Overwriting app.py


In [None]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501 & curl ipv4.icanhazip.com


35.243.162.147
[K[?25hnpx: installed 22 in 1.868s
your url is: https://late-planets-dance.loca.lt
