<a href="https://colab.research.google.com/github/Pulikonda3959/Data-analyst/blob/main/Edufund.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Task 1.1: Create a synthetic dataset
np.random.seed(42)  # Setting seed for reproducibility

# Assuming 1000 users
num_users = 1000

# Generate synthetic data for user attributes and behaviors
user_ids = np.arange(1, num_users + 1)
age = np.random.randint(18, 60, num_users)
income = np.random.randint(30000, 100000, num_users)
goal_creation = np.random.choice([0, 1], size=num_users, p=[0.7, 0.3])
investment = np.random.choice([0, 1], size=num_users, p=[0.8, 0.2])

# Create a synthetic dataset
synthetic_data = pd.DataFrame({
    'user_id': user_ids,
    'age': age,
    'income': income,
    'goal_creation': goal_creation,
    'investment': investment
})

# Task 1.2: Create a data dictionary
data_dictionary = {
    'user_id': 'User ID',
    'age': 'User age',
    'income': 'User income',
    'goal_creation': '1 if user created a goal, 0 otherwise',
    'investment': '1 if user invested, 0 otherwise'
}

# Display the synthetic dataset and data dictionary
print("Synthetic Dataset:")
print(synthetic_data.head())

print("\nData Dictionary:")
for key, value in data_dictionary.items():
    print(f"{key}: {value}")


Synthetic Dataset:
   user_id  age  income  goal_creation  investment
0        1   56   44382              0           0
1        2   46   33756              0           0
2        3   32   50609              0           0
3        4   25   46478              0           0
4        5   38   69666              0           0

Data Dictionary:
user_id: User ID
age: User age
income: User income
goal_creation: 1 if user created a goal, 0 otherwise
investment: 1 if user invested, 0 otherwise


In [2]:
# Task 2.1: Analyze the synthetic data
# You can perform exploratory data analysis and visualization here

# Task 2.2: Develop predictive models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Split the data into features (X) and target variable (y)
X = synthetic_data[['age', 'income']]
y = synthetic_data['goal_creation']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the RandomForestClassifier
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Task 2.3: Evaluate model performance
# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Model Accuracy: 0.65

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.80      0.76       143
           1       0.34      0.26      0.30        57

    accuracy                           0.65       200
   macro avg       0.54      0.53      0.53       200
weighted avg       0.62      0.65      0.63       200



In [3]:
# Task 3: Create a strategy based on model results
# You can use the trained model to predict which users are likely to create goals and invest

# Example: Target users with predicted goal creation probability > 0.5 for goal creation
predicted_probabilities = model.predict_proba(X_test)[:, 1]
target_users = X_test[predicted_probabilities > 0.5]

# Display the targeted users
print("\nTargeted Users:")
print(target_users)



Targeted Users:
     age  income
411   43   52890
513   36   38712
938   36   82130
319   49   65400
210   23   96017
235   47   87134
101   25   70806
901   57   48752
299   45   92187
277   50   53510
879   36   83363
54    19   36783
542   46   65488
866   32   99249
70    57   54917
292   25   99346
289   47   57788
652   50   63397
998   25   72468
714   56   67208
327   20   59757
635   34   74965
644   23   97649
86    25   63536
294   44   46371
892   20   76479
260   55   99768
237   52   31341
583   45   40326
445   33   54860
800   31   93646
849   47   59371
120   43   47327
721   32   56984
281   25   74547
822   21   61910
321   50   73001
429   47   72207
767   59   89889
649   27   42588
314   46   64801
363   49   64701
355   33   58831
948   56   67685


In [7]:
# Task 4: Reporting and Presentation
# You can use Markdown cells in a Jupyter Notebook for this task
# Example Report (Markdown)
"""
## Data-Driven Strategy for User Engagement and Conversion

"""
### Task 1: Data Exploration and Understanding
#### Synthetic Dataset
# Display synthetic dataset
print(synthetic_data.head())


   user_id  age  income  goal_creation  investment
0        1   56   44382              0           0
1        2   46   33756              0           0
2        3   32   50609              0           0
3        4   25   46478              0           0
4        5   38   69666              0           0


In [8]:
# Display data dictionary
for key, value in data_dictionary.items():
    print(f"{key}: {value}")


user_id: User ID
age: User age
income: User income
goal_creation: 1 if user created a goal, 0 otherwise
investment: 1 if user invested, 0 otherwise


In [9]:
# Display model accuracy
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.65


In [12]:
# Display targeted users
print(target_users)


     age  income
411   43   52890
513   36   38712
938   36   82130
319   49   65400
210   23   96017
235   47   87134
101   25   70806
901   57   48752
299   45   92187
277   50   53510
879   36   83363
54    19   36783
542   46   65488
866   32   99249
70    57   54917
292   25   99346
289   47   57788
652   50   63397
998   25   72468
714   56   67208
327   20   59757
635   34   74965
644   23   97649
86    25   63536
294   44   46371
892   20   76479
260   55   99768
237   52   31341
583   45   40326
445   33   54860
800   31   93646
849   47   59371
120   43   47327
721   32   56984
281   25   74547
822   21   61910
321   50   73001
429   47   72207
767   59   89889
649   27   42588
314   46   64801
363   49   64701
355   33   58831
948   56   67685
