In [50]:
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from tkinter import *
from tkinter import ttk, messagebox

df = pd.read_csv("film_script_success_data.csv")


X = df[['genre', 'sentiment_score', 'keyword_count', 'dialogue_length']]
y = df['success']


preprocessor = ColumnTransformer(transformers=[
    ('genre', OneHotEncoder(), ['genre'])
], remainder='passthrough')


model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])
model.fit(X, y)


joblib.dump(model, "film_script_predictor.pkl")

root = Tk()
root.title("Film Script Success Predictor")
root.geometry("520x500")
root.config(bg="#f0fbff")


style = ttk.Style()
style.configure("TButton", font=("Segoe UI", 12))
style.configure("TLabel", font=("Segoe UI", 11), background="#f0fbff")


frame = Frame(root, padx=20, pady=20, bg="#f0fbff")
frame.pack(pady=10)


Label(frame, text="Genre:", bg="#f0fbff").grid(row=0, column=0, sticky=W, pady=5)
genre_var = StringVar()
genre_dropdown = ttk.Combobox(frame, textvariable=genre_var, 
                             values=sorted(df['genre'].unique()), 
                             state="readonly")
genre_dropdown.grid(row=0, column=1)
genre_dropdown.set(df['genre'].unique()[0])


Label(frame, text="Sentiment Score (-1 to 1):", bg="#f0fbff").grid(row=1, column=0, sticky=W, pady=5)
sentiment_entry = Entry(frame, width=10)
sentiment_entry.grid(row=1, column=1)
sentiment_entry.insert(0, "0.0")


Label(frame, text="Keyword Count:", bg="#f0fbff").grid(row=2, column=0, sticky=W, pady=5)
keyword_entry = Entry(frame, width=10)
keyword_entry.grid(row=2, column=1)
keyword_entry.insert(0, "25")


Label(frame, text="Dialogue Length:", bg="#f0fbff").grid(row=3, column=0, sticky=W, pady=5)
dialogue_entry = Entry(frame, width=10)
dialogue_entry.grid(row=3, column=1)
dialogue_entry.insert(0, "500")


def predict_success():
    try:
        
        genre = genre_var.get()
        sentiment = float(sentiment_entry.get())
        keywords = int(keyword_entry.get())
        dialogue = int(dialogue_entry.get())
        
        
        if not (-1 <= sentiment <= 1):
            messagebox.showerror("Input Error", "Sentiment score must be between -1 and 1")
            return
        if keywords <= 0 or dialogue <= 0:
            messagebox.showerror("Input Error", "Keyword count and dialogue length must be positive")
            return
            
        
        data = pd.DataFrame([[genre, sentiment, keywords, dialogue]], 
                           columns=['genre', 'sentiment_score', 'keyword_count', 'dialogue_length'])
        
        
        prediction = model.predict(data)[0]
        probability = model.predict_proba(data)[0][1]
        
        
        success_text = "Likely to Succeed!" if prediction == 1 else "May Not Succeed"
        result_label.config(
            text=f"Prediction: {success_text}\nSuccess Probability: {probability:.2%}",
            foreground="green" if prediction == 1 else "red"
        )
        
    except ValueError as ve:
        messagebox.showerror("Input Error", "Please enter valid numbers for all fields")
    except Exception as e:
        messagebox.showerror("Error", str(e))


predict_button = ttk.Button(root, text="Predict Success", command=predict_success)
predict_button.pack(pady=20)


result_label = Label(root, text="", font=("Segoe UI", 12), wraplength=420, bg="#f0fbff")
result_label.pack(pady=10)


root.mainloop()

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
