In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # Added for neural network
from xgboost import XGBClassifier  # Added for XGBoost
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib

# Load data from CSV file
df = pd.read_csv('geo_points_Big.csv')

# Function to convert time to numerical features
def process_time(time_str):
    time_parts = time_str.split(' ')[0].split(':')
    hour, minute, second = map(int, time_parts)
    am_pm = 0 if 'AM' in time_str else 1  # 0 for AM, 1 for PM
    return hour, minute, second, am_pm

# Apply the function to the Time column
df[['Hour', 'Minute', 'Second', 'AM_PM']] = df['Time'].apply(lambda x: pd.Series(process_time(x)))

# Create cyclical features for hour, minute, and second
df['Hour_sin'] = np.sin(df['Hour'] * (2. * np.pi / 24))
df['Hour_cos'] = np.cos(df['Hour'] * (2. * np.pi / 24))
df['Minute_sin'] = np.sin(df['Minute'] * (2. * np.pi / 60))
df['Minute_cos'] = np.cos(df['Minute'] * (2. * np.pi / 60))

# Drop the original Time column and Count column (assuming it's not needed)
df = df.drop(columns=['Time', 'Count', 'Hour', 'Minute', 'Second'])

# Encode the Zone column to numerical values
label_encoder = LabelEncoder()
df['Zone'] = label_encoder.fit_transform(df['Zone'])

# Split data into training and testing sets
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Separate features and target variable from training dataset
X_train = train.drop(columns='Zone')
y_train = train['Zone']

# Separate features and target variable from testing dataset
X_test = test.drop(columns='Zone')
y_test = test['Zone']

# Normalize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define different classifiers
clf1 = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
# clf2 = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
# clf3 = AdaBoostClassifier(n_estimators=100, learning_rate=1.0, random_state=42)
# clf4 = SVC(kernel='linear', C=1, probability=True, random_state=42)
clf5 = KNeighborsClassifier(n_neighbors=5)
clf6 = XGBClassifier(n_estimators=100, max_depth=3, random_state=42)  # Added XGBoost
# clf7 = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)  # Added Neural Network

# Hyperparameter tuning for Random Forest
param_grid_rf = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
}

grid_rf = GridSearchCV(clf1, param_grid=param_grid_rf, cv=5, n_jobs=-1)
grid_rf.fit(X_train_scaled, y_train)
best_rf = grid_rf.best_estimator_

# # Hyperparameter tuning for Gradient Boosting
# param_grid_gb = {
#     'n_estimators': [100, 200],
#     'learning_rate': [0.1, 0.01],
#     'max_depth': [3, 4],
# }

# grid_gb = GridSearchCV(clf2, param_grid=param_grid_gb, cv=5, n_jobs=-1)
# grid_gb.fit(X_train_scaled, y_train)
# best_gb = grid_gb.best_estimator_

# Hyperparameter tuning for AdaBoost
# param_grid_ada = {
#     'n_estimators': [100, 200],
#     'learning_rate': [1.0, 0.5],
# }

# grid_ada = GridSearchCV(clf3, param_grid=param_grid_ada, cv=5, n_jobs=-1)
# grid_ada.fit(X_train_scaled, y_train)
# best_ada = grid_ada.best_estimator_

# Hyperparameter tuning for SVM
# param_grid_svc = {
#     'C': [0.1, 1, 10],
#     'kernel': ['linear', 'rbf'],
# }

# grid_svc = GridSearchCV(clf4, param_grid=param_grid_svc, cv=5, n_jobs=-1)
# grid_svc.fit(X_train_scaled, y_train)
# best_svc = grid_svc.best_estimator_

# Hyperparameter tuning for KNN
param_grid_knn = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance'],
}

grid_knn = GridSearchCV(clf5, param_grid=param_grid_knn, cv=5, n_jobs=-1)
grid_knn.fit(X_train_scaled, y_train)
best_knn = grid_knn.best_estimator_

# Hyperparameter tuning for XGBoost
param_grid_xgb = {
    'n_estimators': [100, 200],
    'max_depth': [3, 4],
    'learning_rate': [0.1, 0.01],
}

grid_xgb = GridSearchCV(clf6, param_grid=param_grid_xgb, cv=5, n_jobs=-1)
grid_xgb.fit(X_train_scaled, y_train)
best_xgb = grid_xgb.best_estimator_

# Hyperparameter tuning for Neural Network
# param_grid_nn = {
#     'hidden_layer_sizes': [(100, 50), (200, 100)],
#     'max_iter': [1000, 2000],
#     'alpha': [0.0001, 0.001],
# }

# grid_nn = GridSearchCV(clf7, param_grid=param_grid_nn, cv=5, n_jobs=-1)
# grid_nn.fit(X_train_scaled, y_train)
# best_nn = grid_nn.best_estimator_

# Update the ensemble with the best models
eclf = VotingClassifier(estimators=[
    # ('rf', best_rf),
    # ('gb', best_gb),
    # ('ada', best_ada),
    # ('svc', best_svc),
    ('knn', best_knn),
    ('xgb', best_xgb)
    # ('nn', best_nn)
], voting='soft')

# Fit the ensemble model to the training data
eclf.fit(X_train_scaled, y_train)

# Make predictions on the testing data
predictions = eclf.predict(X_test_scaled)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 70.93%


In [11]:
# import pandas as pd
# import numpy as np
# from sklearn.ensemble import VotingClassifier
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.metrics import accuracy_score
# from sklearn.preprocessing import LabelEncoder, StandardScaler
# import joblib

# class GeoZonePredictor:
#     def __init__(self, model_file=None):
#         self.label_encoder = LabelEncoder()
#         self.scaler = StandardScaler()
#         if model_file:
#             self.model = joblib.load(model_file)
#         else:
#             self.model = RandomForestClassifier()

#     def preprocess(self, df):
#         def process_time(time_str):
#             time_parts = time_str.split(' ')[0].split(':')
#             hour, minute, second = map(int, time_parts)
#             am_pm = 0 if 'AM' in time_str else 1  # 0 for AM, 1 for PM
#             return hour, minute, second, am_pm

#         df[['Hour', 'Minute', 'Second', 'AM_PM']] = df['Time'].apply(lambda x: pd.Series(process_time(x)))
#         df['Hour_sin'] = np.sin(df['Hour'] * (2. * np.pi / 24))
#         df['Hour_cos'] = np.cos(df['Hour'] * (2. * np.pi / 24))
#         df['Minute_sin'] = np.sin(df['Minute'] * (2. * np.pi / 60))
#         df['Minute_cos'] = np.cos(df['Minute'] * (2. * np.pi / 60))
#         df = df.drop(columns=['Time', 'Count', 'Hour', 'Minute', 'Second'])
#         df['Zone'] = self.label_encoder.fit_transform(df['Zone'])
#         return df

#     def fit(self, df):
#         df = self.preprocess(df)
#         train, test = train_test_split(df, test_size=0.2, random_state=42)
#         X_train = train.drop(columns='Zone')
#         y_train = train['Zone']
#         X_train_scaled = self.scaler.fit_transform(X_train)
#         self.model = VotingClassifier(estimators=self.clf_estimators, voting='soft')
#         self.model.fit(X_train_scaled, y_train)

#     def predict(self, df):
#         df = self.preprocess(df)
#         X_test = df.drop(columns='Zone')
#         X_test_scaled = self.scaler.transform(X_test)
#         predictions = self.model.predict(X_test_scaled)
#         return predictions

#     def evaluate(self, df):
#         df = self.preprocess(df)
#         X_test = df.drop(columns='Zone')
#         y_test = df['Zone']
#         X_test_scaled = self.scaler.transform(X_test)
#         predictions = self.model.predict(X_test_scaled)
#         accuracy = accuracy_score(y_test, predictions)
#         print(f'Accuracy: {accuracy * 100:.2f}%')   

#     def save_model(self, filepath):
#         joblib.dump(self.model, filepath)

#     def load_model(self, filepath):
#         self.model = joblib.load(filepath)

# # Usage:
# # Assuming `best_knn` and `best_xgb` are your best models from your code above.
# clf_estimators = [('knn', best_knn), ('xgb', best_xgb)]
# predictor = GeoZonePredictor(clf_estimators)
# df = pd.read_csv('geo_points_Big.csv')
# predictor.fit(df)
# predictor.evaluate(df)
# predictor.save_model('model.pkl')

In [12]:
# import pandas as pd
# import numpy as np
# from sklearn.ensemble import VotingClassifier, RandomForestClassifier
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.metrics import accuracy_score
# from sklearn.preprocessing import LabelEncoder, StandardScaler
# import joblib
# from sklearn.neighbors import KNeighborsClassifier
# from xgboost import XGBClassifier

# class GeoZonePredictor:
#     def __init__(self, clf_estimators=None, model_file=None):
#         self.label_encoder = LabelEncoder()
#         self.scaler = StandardScaler()
#         self.clf_estimators = clf_estimators
#         if model_file:
#             loaded_objects = joblib.load(model_file)
#             self.model = loaded_objects['model']
#             self.label_encoder = loaded_objects['label_encoder']
#             self.scaler = loaded_objects['scaler']
#         else:
#             self.model = VotingClassifier(estimators=self.clf_estimators, voting='soft') if self.clf_estimators else None

#     def preprocess(self, df):
#         def process_time(time_str):
#             time_parts = time_str.split(' ')[0].split(':')
#             hour, minute, second = map(int, time_parts)
#             am_pm = 0 if 'AM' in time_str else 1  # 0 for AM, 1 for PM
#             return hour, minute, second, am_pm

#         df[['Hour', 'Minute', 'Second', 'AM_PM']] = df['Time'].apply(lambda x: pd.Series(process_time(x)))
#         df['Hour_sin'] = np.sin(df['Hour'] * (2. * np.pi / 24))
#         df['Hour_cos'] = np.cos(df['Hour'] * (2. * np.pi / 24))
#         df['Minute_sin'] = np.sin(df['Minute'] * (2. * np.pi / 60))
#         df['Minute_cos'] = np.cos(df['Minute'] * (2. * np.pi / 60))
#         df = df.drop(columns=['Time', 'Count', 'Hour', 'Minute', 'Second'])
#         df['Zone'] = self.label_encoder.fit_transform(df['Zone'])
#         return df

#     def fit(self, df):
#         if not self.clf_estimators:
#             raise ValueError("Classifier estimators must be provided for training.")
#         df = self.preprocess(df)
#         train, test = train_test_split(df, test_size=0.2, random_state=42)
#         X_train = train.drop(columns='Zone')
#         y_train = train['Zone']
#         X_train_scaled = self.scaler.fit_transform(X_train)
#         self.model.fit(X_train_scaled, y_train)

#     def predict(self, df):
#         df = self.preprocess(df)
#         X_test = df.drop(columns='Zone')
#         X_test_scaled = self.scaler.transform(X_test)
#         predictions = self.model.predict(X_test_scaled)
#         return predictions

#     def evaluate(self, df):
#         df = self.preprocess(df)
#         X_test = df.drop(columns='Zone')
#         y_test = df['Zone']
#         X_test_scaled = self.scaler.transform(X_test)
#         predictions = self.model.predict(X_test_scaled)
#         accuracy = accuracy_score(y_test, predictions)
#         print(f'Accuracy: {accuracy * 100:.2f}%')   

#     def save_model(self, filepath):
#         joblib.dump({'model': self.model, 'label_encoder': self.label_encoder, 'scaler': self.scaler}, filepath)

#     def load_model(self, filepath):
#         loaded_objects = joblib.load(filepath)
#         self.model = loaded_objects['model']
#         self.label_encoder = loaded_objects['label_encoder']
#         self.scaler = loaded_objects['scaler']

# # Usage:
# # Define the classifiers and hyperparameters for grid search
# knn_params = {'n_neighbors': [3, 5, 7]}
# xgb_params = {'max_depth': [3, 5, 7], 'learning_rate': [0.01, 0.1, 0.2]}
# knn = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5)
# xgb = GridSearchCV(XGBClassifier(), xgb_params, cv=5)

# clf_estimators = [('knn', knn), ('xgb', xgb)]
# predictor = GeoZonePredictor(clf_estimators)
# df = pd.read_csv('geo_points_Big.csv')
# predictor.fit(df)
# predictor.evaluate(df)
# predictor.save_model('model.pkl')


In [14]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_selection import SelectKBest, chi2
import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import f_classif

class GeoZonePredictor:
    def __init__(self, model_file=None):
        self.label_encoder = LabelEncoder()
        if model_file:
            loaded_objects = joblib.load(model_file)
            self.model = loaded_objects['model']
            self.label_encoder = loaded_objects['label_encoder']
        else:
            self.model = None  # Model will be defined in the fit method

    def preprocess(self, df):
        def process_time(time_str):
            time_parts = time_str.split(' ')[0].split(':')
            hour, minute, second = map(int, time_parts)
            am_pm = 0 if 'AM' in time_str else 1  # 0 for AM, 1 for PM
            return hour, minute, second, am_pm

        df[['Hour', 'Minute', 'Second', 'AM_PM']] = df['Time'].apply(lambda x: pd.Series(process_time(x)))
        df['Hour_sin'] = np.sin(df['Hour'] * (2. * np.pi / 24))
        df['Hour_cos'] = np.cos(df['Hour'] * (2. * np.pi / 24))
        df['Minute_sin'] = np.sin(df['Minute'] * (2. * np.pi / 60))
        df['Minute_cos'] = np.cos(df['Minute'] * (2. * np.pi / 60))
        df = df.drop(columns=['Time', 'Count', 'Hour', 'Minute', 'Second'])
        df['Zone'] = self.label_encoder.fit_transform(df['Zone'])
        return df

    def fit(self, df):
        df = self.preprocess(df)
        train, test = train_test_split(df, test_size=0.2, random_state=42)
        X_train = train.drop(columns='Zone')
        y_train = train['Zone']

        # Create a pipeline with feature scaling, feature selection, and classification
        pipeline = Pipeline([
            ('scaler', MinMaxScaler()),  # Use MinMaxScaler instead of StandardScaler
            ('feature_selection', SelectKBest(score_func=chi2, k='all')),  # Or use score_func=f_classif
            ('classifier', RandomForestClassifier())
        ])

        # Define the grid of hyperparameters to search
        param_grid = {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [None, 10, 20, 30],
            'classifier__min_samples_split': [2, 5, 10]
        }

        # Use GridSearchCV to find the best hyperparameters
        grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
        grid_search.fit(X_train, y_train)
        self.model = grid_search.best_estimator_

    def predict(self, df):
        df = self.preprocess(df)
        X_test = df.drop(columns='Zone')
        predictions = self.model.predict(X_test)
        return predictions

    def evaluate(self, df):
        df = self.preprocess(df)
        X_test = df.drop(columns='Zone')
        y_test = df['Zone']
        predictions = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        print(f'Accuracy: {accuracy * 100:.2f}%')   

    def save_model(self, filepath):
        joblib.dump({'model': self.model, 'label_encoder': self.label_encoder}, filepath)

    def load_model(self, filepath):
        loaded_objects = joblib.load(filepath)
        self.model = loaded_objects['model']
        self.label_encoder = loaded_objects['label_encoder']

# Usage:
predictor = GeoZonePredictor()
df = pd.read_csv('geo_points_Big.csv')  # Assuming your data is in this file
predictor.fit(df)
predictor.evaluate(df)
predictor.save_model('model.pkl')


Accuracy: 73.16%


In [28]:
# import pandas as pd
# import numpy as np
# import tensorflow as tf
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder, StandardScaler

# # Load your CSV data with numerical zone values
# df = pd.read_csv('geo_points_Big.csv')  # Replace 'your_data.csv' with your actual file name

# # Extract features (latitude and longitude) and labels (zone)
# X = df[['Latitude', 'Longitude']].values
# y = df['Zone'].values

# # Encode the zone labels (0, 1, 2)
# label_encoder = LabelEncoder()
# y = label_encoder.fit_transform(y)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Standardize the input features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# # Define a TensorFlow neural network model
# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(2,)),  # Input layer for latitude and longitude
#     tf.keras.layers.Dense(64, activation='relu'),  # Hidden layer
#     tf.keras.layers.Dense(32, activation='relu'),  # Hidden layer
#     tf.keras.layers.Dense(3, activation='softmax')  # Output layer with 3 classes (zones)
# ])

# # Compile the model
# model.compile(optimizer='adam',
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])

# # Train the model
# model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

# # Evaluate the model on the test data
# test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
# print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# # Save the trained model
# model.save('zone_prediction_model')
import numpy as np

# Assuming X_train contains your training data with latitude and longitude
mean_latitude = np.mean(X_train[:, 0])
mean_longitude = np.mean(X_train[:, 1])
stddev_latitude = np.std(X_train[:, 0])
stddev_longitude = np.std(X_train[:, 1])

# Use these values for standardization during training and save them
# Load the trained model
model = tf.keras.models.load_model('zone_prediction_model')

# Standardize the input features using TensorFlow
sample_latitude = 10.7128  # Replace with your sample latitude
sample_longitude = 4.0060  # Replace with your sample longitude

sample_data_scaled = tf.constant(
    [[(sample_latitude - mean_latitude) / stddev_latitude, (sample_longitude - mean_longitude) / stddev_longitude]],
    dtype=tf.float32
)

# Predict the zone for the sample data point
predictions = model.predict(sample_data_scaled)

# Decode the predicted label back to the original zone label using a list of class labels
class_labels = ['black', 'orange', 'red']  # Define the class labels in the same order as your model's output
predicted_zone = class_labels[np.argmax(predictions)]

print(f"Predicted Zone: {predicted_zone}")
from geopy.geocoders import Nominatim

def get_user_location():
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        user_location = geolocator.geocode("Your Address Here")  # Replace with user's address or leave it empty
        if user_location:
            latitude = user_location.latitude
            longitude = user_location.longitude
            return latitude, longitude
    except Exception as e:
        print("Error: Unable to get user's location.")
    return None, None

# Get the user's current location
user_latitude, user_longitude = get_user_location()
print(user_latitude)

Predicted Zone: red
None
