In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('rec_dataset_hackathon.csv')

# Preprocess the data
# Replace missing values if any
data.fillna(0, inplace=True)

# Encode categorical variables
cat_features = ['User Type', 'Domain', 'Field', 'Usage', 'Timeline']
for feature in cat_features:
    encoder = LabelEncoder()
    data[feature] = encoder.fit_transform(data[feature])
data['Recproj'] = data['Recproj'].apply({'NLP':0, 'DATA':1,'CV':2, 'IMP':3, 'PRE':4}.get)

# Separate the features and target variable
X = data.drop('Recproj', axis=1)
y = data['Recproj']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the decision tree classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# User input function to get new data for prediction
def get_user_input():
    user_input = {}

    for feature in X.columns:
        if feature in cat_features:
            unique_values = data[feature].unique()
            options = ', '.join(str(value) for value in unique_values)
            print(f"Enter {feature} (Options: {options}): ")
            user_input[feature] = input()
        else:
            print(f"Enter {feature}: ")
            user_input[feature] = float(input())

    return pd.DataFrame(user_input, index=[0])


# Make predictions on new data
user_data = get_user_input()
user_data = user_data.astype(y.dtypes)

prediction = model.predict(user_data)

prediction_label = {0: 'NLP', 1: 'DATA', 2: 'CV', 3:'IMP', 4:'PRE'}.get(prediction[0])
print("Predicted Project:", prediction_label)

Enter User Type (Options: 0, 1): 
0
Enter Domain (Options: 3, 0, 4, 2, 1): 
2
Enter Field (Options: 3, 4, 2, 1, 0): 
2
Enter Usage (Options: 1, 0): 
1
Enter Timeline (Options: 2, 0, 1): 
1
Predicted Project: PRE


In [2]:
from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.823170731707317
