In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import tkinter as tk
from tkinter import ttk

# Load your dataset
file_path = 'startup_data.csv'
startup_data = pd.read_csv(file_path)

# Drop unnecessary columns and handle missing values
startup_data = startup_data.drop(columns=['Unnamed: 0', 'Unnamed: 6', 'id', 'object_id', 'latitude', 'longitude', 'zip_code'])

# Separate features and target variable
X = startup_data.drop(columns=['status'])
y = startup_data['status']

# Encode the target variable
y = y.apply(lambda x: 1 if x == 'acquired' else 0)  # 1 for acquired, 0 for closed

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# Preprocessing for numerical data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Preprocess the data
X_preprocessed = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Define the TensorFlow model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

# Function to predict startup success
def predict_startup_success(input_data):
    """
    Predict the success score of a startup based on input data.

    Parameters:
    input_data (dict): Dictionary containing startup details.

    Returns:
    float: Predicted success score (0 to 1).
    """
    # List of required columns for minimal prediction
    required_columns = X.columns  # Use the columns from the original dataset

    # Default values for missing columns
    default_values = {
        'state_code': 'CA', 'funding_total_usd': 0, 'funding_rounds': 0, 'relationships': 0, 'milestones': 0, 'category_code': 'other'
    }
    
    # Ensure input data has all required columns, adding missing ones with default values
    for column in required_columns:
        if column not in input_data:
            if column in default_values:
                input_data[column] = default_values[column]
            else:
                input_data[column] = 0  # Assign a generic default value for columns not in default_values

    # Convert input data to DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Preprocess input data
    input_preprocessed = preprocessor.transform(input_df)
    
    # Make prediction
    prediction = model.predict(input_preprocessed)
    
    return prediction[0][0]

# GUI Implementation
def predict_and_display():
    input_data = {
        'state_code': state_code_var.get(),
        'funding_total_usd': float(funding_total_usd_var.get()),
        'funding_rounds': int(funding_rounds_var.get()),
        'relationships': int(relationships_var.get()),
        'milestones': int(milestones_var.get()),
        'category_code': category_code_var.get()
    }
    success_score = predict_startup_success(input_data)
    result_var.set(f"Predicted Success Score: {success_score:.2f}")

# Create the main window
root = tk.Tk()
root.title("Startup Success Prediction")

# Create and place the input fields
ttk.Label(root, text="State Code:").grid(column=0, row=0, padx=10, pady=5)
state_code_var = tk.StringVar()
state_code_combobox = ttk.Combobox(root, textvariable=state_code_var)
state_code_combobox['values'] = list(startup_data['state_code'].dropna().unique())
state_code_combobox.grid(column=1, row=0, padx=10, pady=5)

ttk.Label(root, text="Funding Total (USD):").grid(column=0, row=1, padx=10, pady=5)
funding_total_usd_var = tk.StringVar()
ttk.Entry(root, textvariable=funding_total_usd_var).grid(column=1, row=1, padx=10, pady=5)

ttk.Label(root, text="Funding Rounds:").grid(column=0, row=2, padx=10, pady=5)
funding_rounds_var = tk.StringVar()
ttk.Entry(root, textvariable=funding_rounds_var).grid(column=1, row=2, padx=10, pady=5)

ttk.Label(root, text="Relationships:").grid(column=0, row=3, padx=10, pady=5)
relationships_var = tk.StringVar()
ttk.Entry(root, textvariable=relationships_var).grid(column=1, row=3, padx=10, pady=5)

ttk.Label(root, text="Milestones:").grid(column=0, row=4, padx=10, pady=5)
milestones_var = tk.StringVar()
ttk.Entry(root, textvariable=milestones_var).grid(column=1, row=4, padx=10, pady=5)

ttk.Label(root, text="Category Code:").grid(column=0, row=5, padx=10, pady=5)
category_code_var = tk.StringVar()
category_code_combobox = ttk.Combobox(root, textvariable=category_code_var)
category_code_combobox['values'] = list(startup_data['category_code'].dropna().unique())
category_code_combobox.grid(column=1, row=5, padx=10, pady=5)

# Create and place the result label
result_var = tk.StringVar()
ttk.Label(root, textvariable=result_var).grid(column=0, row=6, columnspan=2, padx=10, pady=5)

# Create and place the predict button
ttk.Button(root, text="Predict Success", command=predict_and_display).grid(column=0, row=7, columnspan=2, padx=10, pady=5)

# Run the main loop
root.mainloop()


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6612 - loss: 0.6605 - val_accuracy: 0.8378 - val_loss: 0.5467
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8933 - loss: 0.4681 - val_accuracy: 0.8986 - val_loss: 0.3624
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9397 - loss: 0.2600 - val_accuracy: 0.9527 - val_loss: 0.1958
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9927 - loss: 0.0932 - val_accuracy: 0.9865 - val_loss: 0.0945
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0369 - val_accuracy: 0.9865 - val_loss: 0.0522
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0138 - val_accuracy: 0.9865 - val_loss: 0.0403
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━