In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import pandas as pd
import requests
import json

In [2]:
torch.cuda.empty_cache()

if torch.cuda.is_available():
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"\nGPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  Memory Allocated: {torch.cuda.memory_allocated(i) / 1024**2:.2f} MB")
        print(f"  Memory Cached: {torch.cuda.memory_reserved(i) / 1024**2:.2f} MB")
else:
    print("No GPU available. Using CPU.")

Number of GPUs available: 1

GPU 0: NVIDIA GeForce RTX 3050 Ti Laptop GPU
  Memory Allocated: 0.00 MB
  Memory Cached: 0.00 MB


In [3]:
# L'URL de l'API GraphQL
url = "http://localhost:4000/graphql"  # Remplace par ton URL GraphQL

# Requête GraphQL
query = """
  query {
  crimes {
    cmplnt_num
    addr_pct_cd
    boro_nm
    cmplnt_fr_dt
    cmplnt_fr_tm
    cmplnt_to_dt
    cmplnt_to_tm
    crm_atpt_cptd_cd
    jurisdiction_code
    juris_desc
    ofns_desc
    pd_desc
    latitude
    longitude
  }
}
"""

# Corps de la requête
payload = {
    'query': query
}

# En-têtes
headers = {
    'Content-Type': 'application/json',
}

# Effectuer la requête POST
response = requests.post(url, json=payload, headers=headers)
data=[]
# Vérifier la réponse
if response.status_code == 200:
    data_json = response.json()
    data=data_json['data']['crimes']
    print(len(data))
else:
    print(f"Erreur: {response.status_code}")

83691


In [4]:
import torch
import torch.nn as nn

try:
    device = torch.device("cuda")
    test_model = nn.Linear(10, 1).to(device)
    print("✅ Model loaded to GPU successfully.")
except Exception as e:
    print("❌ Failed to load model on GPU:", e)
print(torch.cuda.is_available())


✅ Model loaded to GPU successfully.
True


In [9]:
df = pd.DataFrame(data)

# Remove rows with clearly invalid dates (e.g., before 2000 or after 2100)
def is_valid_date(row):
    try:
        dt = pd.to_datetime(row['cmplnt_fr_dt'] + ' ' + row['cmplnt_fr_tm'], errors='raise')
        return 2000 <= dt.year <= 2100
    except:
        return False

df = df[df.apply(is_valid_date, axis=1)].copy()

# Now safely parse the datetime
df['datetime'] = pd.to_datetime(df['cmplnt_fr_dt'] + ' ' + df['cmplnt_fr_tm'])

# Extract time-based features
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.weekday
df['day_of_month'] = df['datetime'].dt.day
df['month'] = df['datetime'].dt.month

# Normalize time features
scaler = MinMaxScaler()
time_features = ['hour', 'day_of_week', 'day_of_month', 'month']
df[time_features] = scaler.fit_transform(df[time_features])

# Create tensors
X = torch.tensor(df[time_features].values, dtype=torch.float32)
y = torch.tensor([[1.0]] * len(df), dtype=torch.float32)  # Placeholder target

# Dataset and DataLoader
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Model
class CrimePredictor(nn.Module):
    def __init__(self, input_dim):
        super(CrimePredictor, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

# Training setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CrimePredictor(X.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(loader)}")

  df['datetime'] = pd.to_datetime(df['cmplnt_fr_dt'] + ' ' + df['cmplnt_fr_tm'])


Epoch [1/10], Loss: 0.02759128200039035
Epoch [2/10], Loss: 1.9075017869920187e-05
Epoch [3/10], Loss: 5.396046444657009e-06
Epoch [4/10], Loss: 2.1319056397562057e-06
Epoch [5/10], Loss: 1.116885322700933e-06
Epoch [6/10], Loss: 5.921287712546922e-07
Epoch [7/10], Loss: 3.3376372216467993e-07
Epoch [8/10], Loss: 4.205205944835504e-07
Epoch [9/10], Loss: 4.978107331499137e-07
Epoch [10/10], Loss: 4.911197669232055e-07


In [None]:
import joblib
# Save the scaler
joblib.dump(scaler, "./model/scaler.pkl")

torch.save(model, "./model/crime_predictor_full.pth")


In [11]:
model = torch.load("./model/crime_predictor_full.pth", map_location=device)
model.eval()


  model = torch.load("./model/crime_predictor_full.pth", map_location=device)


CrimePredictor(
  (model): Sequential(
    (0): Linear(in_features=4, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=1, bias=True)
  )
)