In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/regresionlogistica1/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/regresionlogistica1


In [None]:
import pandas as pd
import numpy as np

# Generating random names
def generate_names(n):
    names = []
    for _ in range(n):
        name_length = np.random.randint(5, 10)
        name = ''.join(np.random.choice(list('abcdefghijklmnopqrstuvwxyz'), size=name_length))
        names.append(name.capitalize())
    return names

# Generating random account types
def generate_account_types(n):
    return np.random.choice(['ahorro', 'corriente'], size=n)

# Assigning priorities based on account types and money
def assign_priorities(account_types, money_january, money_february, money_march):
    priorities = []
    for acc_type, january, february, march in zip(account_types, money_january, money_february, money_march):
        if acc_type == 'corriente' and (january < february < march):
            priorities.append('high')
        elif acc_type == 'ahorro' and (january > february > march):
            priorities.append('low')
        else:
            priorities.append('medium')
    return priorities

# Generating random amounts of money for each month
def generate_money(n):
    money_january = np.random.randint(3000, 10001, size=n)
    money_february = np.random.randint(3000, 10001, size=n)
    money_march = np.random.randint(3000, 10001, size=n)

    # Randomly decrement or increment some values
    for i in range(n):
        if np.random.random() < 0.3:  # 30% chance of decrement or increment
            choice = np.random.choice([-1, 1])
            money_january[i] += choice * np.random.randint(500, 2001)  # Decrement or increment between 500 and 2000
        if np.random.random() < 0.3:
            choice = np.random.choice([-1, 1])
            money_february[i] += choice * np.random.randint(500, 2001)
        if np.random.random() < 0.3:
            choice = np.random.choice([-1, 1])
            money_march[i] += choice * np.random.randint(500, 2001)

    return money_january, money_february, money_march

# Generating dataset
n = 20000
names = generate_names(n)
account_types = generate_account_types(n)
money_january, money_february, money_march = generate_money(n)
priorities = assign_priorities(account_types, money_january, money_february, money_march)

# Creating DataFrame
data = {
    'Name': names,
    'Account_Type': account_types,
    'Money_January': money_january,
    'Money_February': money_february,
    'Money_March': money_march,
    'Priority': priorities
}

df = pd.DataFrame(data)

# Saving to CSV
df.to_csv('dataset.csv', index=False)
print(df)

            Name Account_Type  Money_January  Money_February  Money_March  \
0      Guvhqkndy       ahorro           5392            9406         3927   
1      Uwmpdhbde    corriente           6754            4496         5532   
2         Hlllul    corriente           7163            7341         9839   
3          Ychmk       ahorro           4290            7627         7649   
4        Awijaou       ahorro           3993            4333         9168   
...          ...          ...            ...             ...          ...   
19995  Nfyvadmix    corriente           4678            8987         8521   
19996  Axciaxlck       ahorro           9551            9449         8425   
19997  Wojmjyvka    corriente           9518            7319        11042   
19998      Yohze    corriente           7006            8698         3464   
19999  Hvqxzvzqu    corriente           5869           11279         9516   

      Priority  
0       medium  
1       medium  
2         high  
3      

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset
df = pd.read_csv('dataset.csv')

In [None]:
# Encoding categorical variables
df['Account_Type'] = df['Account_Type'].map({'ahorro': 0, 'corriente': 1})

# Create a new binary feature indicating whether the money increased over the three months
df['Money_Increase'] = ((df['Money_February'] > df['Money_January']) & (df['Money_March'] > df['Money_February'])).astype(int)

# Splitting the dataset into features and target variable
X = df[['Account_Type', 'Money_Increase']]
y = df['Priority']

In [None]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating and training the Logistic Regression model
model = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

In [None]:
# Making predictions
predictions = model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.92025


In [None]:
# Predicting priority for a new client
# Suppose the new client has a 'corriente' account and their money increased over the three months
new_client_account = 1 # 1 for corriente, 0 for ahorro
new_client_money_increase = 1  # 1 if money increased, 0 otherwise
predicted_priority = model.predict([[new_client_account, new_client_money_increase]])
print("Predicted priority for the new client:", predicted_priority)

Predicted priority for the new client: ['high']




# **This code will generate a dataset with the updated priorities according to your specifications and then train a logistic regression model to classify priorities into 'high', 'medium', and 'low' based on account type and the trend of money increase. Finally, it predicts the priority for a new client.**