In [1]:
!pip install pyperclip



# Auto_ML_System (First Version)

In [28]:
import pandas as pd
import numpy as np
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import pyperclip  # To copy text to clipboard

class AutoMLApp:
    def __init__(self, root):
        self.root = root
        self.root.title("AutoML Application")
        self.root.geometry("700x500")

        self.dataset = None
        self.target = None
        self.model = None
        self.problem_type = None
        self.generated_code = ""

        # UI Components
        tk.Label(root, text="AutoML System", font=("Arial", 20)).pack(pady=10)
        tk.Button(root, text="Import Dataset", command=self.import_dataset, width=20).pack(pady=10)
        tk.Button(root, text="Start Analysis", command=self.start_analysis, width=20).pack(pady=10)
        tk.Button(root, text="Copy Generated Code", command=self.copy_code, width=20).pack(pady=10)
        tk.Button(root, text="Quit", command=root.quit, width=20).pack(pady=10)

        self.code_display = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=80, height=10)
        self.code_display.pack(pady=10)

    def import_dataset(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV files", ".csv"), ("Excel files", ".xlsx")])
        if not file_path:
            messagebox.showerror("Error", "No file selected!")
            return

        try:
            if file_path.endswith('.csv'):
                self.dataset = pd.read_csv(file_path)
            else:
                self.dataset = pd.read_excel(file_path)
            
            messagebox.showinfo("Success", "Dataset loaded successfully!")
            print("Dataset Head:\n", self.dataset.head())
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load dataset: {e}")

    def start_analysis(self):
        if self.dataset is None:
            messagebox.showerror("Error", "No dataset loaded!")
            return
        
        # Select target variable
        self.target = tk.simpledialog.askstring("Input", f"Choose Target Variable:\n{', '.join(self.dataset.columns)}")
        if self.target not in self.dataset.columns:
            messagebox.showerror("Error", "Invalid target variable!")
            return

        # Preprocess and train models
        self.preprocess_data()
        self.train_models()

    def preprocess_data(self):
        # Handle missing values
        self.dataset.fillna(self.dataset.mean(numeric_only=True), inplace=True)
        self.dataset.dropna(inplace=True)

        # Encode categorical variables
        for col in self.dataset.select_dtypes(include=['object']).columns:
            le = LabelEncoder()
            self.dataset[col] = le.fit_transform(self.dataset[col])

        print("Data Preprocessing Complete!")
        print(self.dataset.info())

    def train_models(self):
        # Split data
        X = self.dataset.drop(columns=[self.target])
        y = self.dataset[self.target]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

        # Determine problem type
        if y.nunique() <= 2:
            self.problem_type = "classification"
            models = {
                "Logistic Regression": LogisticRegression(max_iter=1000),
                "Random Forest Classifier": RandomForestClassifier(),
            }
            metric = accuracy_score
        else:
            self.problem_type = "regression"
            models = {
                "Linear Regression": LinearRegression(),
                "Random Forest Regressor": RandomForestRegressor(),
            }
            metric = mean_squared_error

        # Train and evaluate models
        best_model, best_score = None, -np.inf if self.problem_type == "classification" else np.inf
        for name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            score = metric(y_test, y_pred)
            print(f"{name} Score: {score}")
            
            if (self.problem_type == "classification" and score > best_score) or \
               (self.problem_type == "regression" and score < best_score):
                best_model, best_score = model, score

        self.model = best_model
        messagebox.showinfo("Best Model", f"Best Model: {type(best_model).__name__} with Score: {best_score}")
        self.generate_code(X_train, y_train)
        self.visualize_results(X_test, y_test)

    def visualize_results(self, X_test, y_test):
        y_pred = self.model.predict(X_test)

        if self.problem_type == "regression":
            plt.figure(figsize=(10, 6))
            plt.scatter(range(len(y_test)), y_test, color='blue', label="Actual")
            plt.scatter(range(len(y_pred)), y_pred, color='red', label="Predicted")
            plt.legend()
            plt.title("Actual vs Predicted")
            plt.show()
        else:
            sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, cmap="Blues", fmt="d")
            plt.title("Confusion Matrix")
            plt.show()

    def generate_code(self, X_train, y_train):
        # Generate ML training code
        self.generated_code = f"""
# Generated ML Code
from sklearn.model_selection import train_test_split
from sklearn.linear_model import {type(self.model).__name__}
from sklearn.metrics import accuracy_score, mean_squared_error

# Load dataset
# dataset = pd.read_csv("your_dataset.csv")

# Preprocessing
X = dataset.drop(columns=["{self.target}"])
y = dataset["{self.target}"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train model
model = {type(self.model).__name__}()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
score = {"accuracy_score(y_test, y_pred)" if self.problem_type == "classification" else "mean_squared_error(y_test, y_pred)"}
print("Model Score:", score)
        """
        self.code_display.delete(1.0, tk.END)
        self.code_display.insert(tk.END, self.generated_code)

    def copy_code(self):
        if self.generated_code:
            pyperclip.copy(self.generated_code)
            messagebox.showinfo("Copied", "Generated code copied to clipboard!")
        else:
            messagebox.showerror("Error", "No code generated yet!")

# Run the App
if __name__ == "__main__":
    root = tk.Tk()
    app = AutoMLApp(root)
    root.mainloop()

# Auto_ML_System (Second Version)

In [30]:
import pandas as pd
import numpy as np
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import pyperclip  # To copy text to clipboard

class AutoMLApp:
    def __init__(self, root):
        self.root = root
        self.root.title("AutoML Application")
        self.root.geometry("700x500")

        self.dataset = None
        self.target = None
        self.independent_vars = None
        self.model = None
        self.problem_type = None
        self.generated_code = ""

        # UI Components
        tk.Label(root, text="AutoML System", font=("Arial", 20)).pack(pady=10)
        tk.Button(root, text="Import Dataset", command=self.import_dataset, width=20).pack(pady=10)
        tk.Button(root, text="Start Analysis", command=self.start_analysis, width=20).pack(pady=10)
        tk.Button(root, text="Copy Generated Code", command=self.copy_code, width=20).pack(pady=10)
        tk.Button(root, text="Quit", command=root.quit, width=20).pack(pady=10)

        self.code_display = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=80, height=10)
        self.code_display.pack(pady=10)

    def import_dataset(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV files", ".csv"), ("Excel files", ".xlsx")])
        if not file_path:
            messagebox.showerror("Error", "No file selected!")
            return

        try:
            if file_path.endswith('.csv'):
                self.dataset = pd.read_csv(file_path)
            else:
                self.dataset = pd.read_excel(file_path)

            messagebox.showinfo("Success", "Dataset loaded successfully!")
            print("Dataset Head:\n", self.dataset.head())
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load dataset: {e}")

    def start_analysis(self):
        if self.dataset is None:
            messagebox.showerror("Error", "No dataset loaded!")
            return

        # Select target variable
        self.target = tk.simpledialog.askstring("Input", f"Choose Target Variable (Dependent):\n{', '.join(self.dataset.columns)}")
        if self.target not in self.dataset.columns:
            messagebox.showerror("Error", "Invalid target variable!")
            return

        # Select independent variables
        independent_vars = tk.simpledialog.askstring("Input", f"Choose Independent Variables (comma-separated):\n{', '.join(self.dataset.columns)}")
        if not independent_vars:
            messagebox.showerror("Error", "No independent variables selected!")
            return

        self.independent_vars = [var.strip() for var in independent_vars.split(",")]
        for var in self.independent_vars:
            if var not in self.dataset.columns:
                messagebox.showerror("Error", f"Invalid independent variable: {var}")
                return

        if self.target in self.independent_vars:
            messagebox.showerror("Error", "Target variable cannot be an independent variable!")
            return

        # Preprocess and train models
        self.preprocess_data()
        self.train_models()

    def preprocess_data(self):
        # Handle missing values
        self.dataset.fillna(self.dataset.mean(numeric_only=True), inplace=True)
        self.dataset.dropna(inplace=True)

        # Encode categorical variables
        for col in self.dataset.select_dtypes(include=['object']).columns:
            le = LabelEncoder()
            self.dataset[col] = le.fit_transform(self.dataset[col])

        print("Data Preprocessing Complete!")
        print(self.dataset.info())

    def train_models(self):
        # Split data
        X = self.dataset[self.independent_vars]
        y = self.dataset[self.target]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

        # Determine problem type
        if y.nunique() <= 2:
            self.problem_type = "classification"
            models = {
                "Logistic Regression": LogisticRegression(max_iter=1000),
                "Random Forest Classifier": RandomForestClassifier(),
            }
            metric = accuracy_score
        else:
            self.problem_type = "regression"
            models = {
                "Linear Regression": LinearRegression(),
                "Random Forest Regressor": RandomForestRegressor(),
            }
            metric = mean_squared_error

        # Train and evaluate models
        best_model, best_score = None, -np.inf if self.problem_type == "classification" else np.inf
        for name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            score = metric(y_test, y_pred)
            print(f"{name} Score: {score}")

            if (self.problem_type == "classification" and score > best_score) or \
               (self.problem_type == "regression" and score < best_score):
                best_model, best_score = model, score

        self.model = best_model
        messagebox.showinfo("Best Model", f"Best Model: {type(best_model).__name__} with Score: {best_score}")
        self.generate_code(X_train, y_train)
        self.visualize_results(X_test, y_test)

    def visualize_results(self, X_test, y_test):
        y_pred = self.model.predict(X_test)

        if self.problem_type == "regression":
            plt.figure(figsize=(10, 6))
            plt.scatter(range(len(y_test)), y_test, color='blue', label="Actual")
            plt.scatter(range(len(y_pred)), y_pred, color='red', label="Predicted")
            plt.legend()
            plt.title("Actual vs Predicted")
            plt.show()
        else:
            sns.heatmap(pd.crosstab(y_test, y_pred), annot=True, cmap="Blues", fmt="d")
            plt.title("Confusion Matrix")
            plt.show()

    def generate_code(self, X_train, y_train):
        # Generate ML training code
        self.generated_code = f"""
# Generated ML Code
from sklearn.model_selection import train_test_split
from sklearn.linear_model import {type(self.model).__name__}
from sklearn.metrics import accuracy_score, mean_squared_error

# Load dataset
# dataset = pd.read_csv("your_dataset.csv")

# Preprocessing
X = dataset[{self.independent_vars}]
y = dataset["{self.target}"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train model
model = {type(self.model).__name__}()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
score = {"accuracy_score(y_test, y_pred)" if self.problem_type == "classification" else "mean_squared_error(y_test, y_pred)"}
print("Model Score:", score)
        """
        self.code_display.delete(1.0, tk.END)
        self.code_display.insert(tk.END, self.generated_code)

    def copy_code(self):
        if self.generated_code:
            pyperclip.copy(self.generated_code)
            messagebox.showinfo("Copied", "Generated code copied to clipboard!")
        else:
            messagebox.showerror("Error", "No code generated yet!")

# Run the App
if __name__ == "__main__":
    root = tk.Tk()
    app = AutoMLApp(root)
    root.mainloop()