In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import joblib

# Load the dataset
dataset = pd.read_csv("input.csv")  # Replace with your actual dataset filename

# Mapping for the 'Invested In' categories
invested_in_mapping = {
    'Mutual Funds and Stocks': 0,
    'Government Schemes': 1,
    'Bank FDs': 2,
    'Private Bank Investment': 3
}

# Map 'Invested In' to numerical values
dataset['Invested In'] = dataset['Invested In'].map(invested_in_mapping)

# Extract features (X) and target variable (y)
X = dataset[['Gender', 'Age', 'Salary', 'Amount To Be Invested', 'Number Of Children', 'Domain Of Expertise']]
y = dataset['Invested In']

# One-hot encode categorical features
X = pd.get_dummies(X, columns=['Gender', 'Domain Of Expertise'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the decision tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate model performance on the test set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on the test set: {accuracy}")

# Save the trained model
model_filename = 'new_decision_tree_model.pkl'
joblib.dump(model, model_filename)


Accuracy on the test set: 0.3


['new_decision_tree_model.pkl']

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load your dataset (replace 'your_dataset.csv' with the actual filename)
df = pd.read_csv('input.csv')

# Define features and target variable
X = df[['Gender', 'Age', 'Salary', 'Amount To Be Invested', 'Number Of Children', 'Domain Of Expertise']]
y = df['Invested In']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing for numerical and categorical data
numeric_features = ['Age', 'Salary', 'Amount To Be Invested', 'Number Of Children']
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_features = ['Gender', 'Domain Of Expertise']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Define the model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

# Save the trained model to a file
joblib.dump(model, 'decisiontreefin.pkl')


Model Accuracy: 0.225


['decisiontreefin.pkl']

In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load your dataset (replace 'your_dataset.csv' with your actual dataset)
data = pd.read_csv('input.csv')

# Preprocess the data
# (Handle missing values, encode categorical variables, etc.)

# Separate features and target variable
X = data.drop('Invested In', axis=1)
y = data['Invested In']

# Encode categorical variables
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

# Save the model to a file
joblib.dump(model, 'investment_model.pkl')


Model Accuracy: 0.275


['investment_model.pkl']

In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

# Load your dataset (replace 'your_dataset.csv' with your actual dataset)
data = pd.read_csv('input.csv')

# Preprocess the data
# Assuming your dataset is clean, but you might need to handle missing values and perform other preprocessing steps

# Encode categorical variables
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['Domain Of Expertise'] = label_encoder.fit_transform(data['Domain Of Expertise'])
data['Invested In'] = label_encoder.fit_transform(data['Invested In'])

# Separate features and target variable
X = data.drop('Invested In', axis=1)
y = data['Invested In']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

# Save the model to a file
joblib.dump(model, 'investment_model1.pkl')


Model Accuracy: 0.3


['investment_model1.pkl']