In [0]:
pip install tensorflow

In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [0]:
# ------------------------------------------------------------
# 1. COLETA E CARREGAMENTO DOS DADOS
# ------------------------------------------------------------
# Digamos que temos um CSV com as colunas:
# 'concurso', 'data', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6'
# File location and type
file_location = "/FileStore/novo_diretorio/mega_sena_07_12_2024.csv"
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location).toPandas()

In [0]:
df

Unnamed: 0,Concurso,Data,Dezena 1,Dezena 2,Dezena 3,Dezena 4,Dezena 5,Dezena 6
0,1,11/03/1996,4,5,30,33,41,52
1,2,18/03/1996,9,37,39,41,43,49
2,3,25/03/1996,10,11,29,30,36,47
3,4,01/04/1996,1,5,6,27,42,59
4,5,08/04/1996,1,2,6,16,19,46
...,...,...,...,...,...,...,...,...
2800,2801,28/11/2024,25,27,33,46,48,56
2801,2802,30/11/2024,17,21,26,28,32,60
2802,2803,03/12/2024,1,20,32,43,57,59
2803,2804,05/12/2024,14,24,25,31,33,60


In [0]:
df['Data'] = df['Data'].apply(lambda a: '-'.join(reversed(a.split('/'))) if '/' in a else a)

df['Data'] = pd.to_datetime(df['Data'], format='%Y-%m-%d')

In [0]:
df.columns = ['Concurso', 'data', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6']

In [0]:
df

Unnamed: 0,Concurso,data,num1,num2,num3,num4,num5,num6
0,1,1996-03-11,4,5,30,33,41,52
1,2,1996-03-18,9,37,39,41,43,49
2,3,1996-03-25,10,11,29,30,36,47
3,4,1996-04-01,1,5,6,27,42,59
4,5,1996-04-08,1,2,6,16,19,46
...,...,...,...,...,...,...,...,...
2800,2801,2024-11-28,25,27,33,46,48,56
2801,2802,2024-11-30,17,21,26,28,32,60
2802,2803,2024-12-03,1,20,32,43,57,59
2803,2804,2024-12-05,14,24,25,31,33,60


In [0]:
inteiros = ['num1', 'num2', 'num3', 'num4', 'num5', 'num6']
for i in inteiros:
    df[i] = df[i].astype(int)


In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2805 entries, 0 to 2804
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Concurso  2805 non-null   object        
 1   data      2805 non-null   datetime64[ns]
 2   num1      2805 non-null   int64         
 3   num2      2805 non-null   int64         
 4   num3      2805 non-null   int64         
 5   num4      2805 non-null   int64         
 6   num5      2805 non-null   int64         
 7   num6      2805 non-null   int64         
dtypes: datetime64[ns](1), int64(6), object(1)
memory usage: 175.4+ KB


In [0]:
# ------------------------------------------------------------
# 2. PRÉ-PROCESSAMENTO
# ------------------------------------------------------------
# Ordenar pelo concurso (caso não esteja ordenado)
df = df.sort_values(by='Concurso', ascending=True)

# Selecionar apenas as colunas de interesse (os 6 números)
numeros = df[['num1', 'num2', 'num3', 'num4', 'num5', 'num6']].values

# Normalizar de 1..60 para 0..1 (min-max scaling)
numeros_scaled = (numeros - 1) / (60 - 1)  # ou 59

In [0]:
# ------------------------------------------------------------
# 3. CRIAÇÃO DA JANELA DE TREINAMENTO
# ------------------------------------------------------------
def create_dataset(data, window_size=10):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])      # sequencia de 10 sorteios passados
        y.append(data[i+window_size])        # sorteio alvo (o próximo)
    return np.array(X), np.array(y)

window_size = 10
X, y = create_dataset(numeros_scaled, window_size)

In [0]:
# ------------------------------------------------------------
# 4. SEPARAMOS TREINO E TESTE
# ------------------------------------------------------------
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


In [0]:
# ------------------------------------------------------------
# 5. CONSTRUÇÃO DO MODELO (LSTM SIMPLES)
# ------------------------------------------------------------
model = keras.Sequential()
model.add(layers.LSTM(64, input_shape=(window_size, 6), return_sequences=True))
model.add(layers.LSTM(32))
model.add(layers.Dense(6, activation='linear'))  # 6 números como saída

model.compile(
    loss='mean_squared_error',
    optimizer='adam',
    metrics=['mae']
)

2024-12-26 19:46:55.240942: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  super().__init__(**kwargs)


In [0]:
# ------------------------------------------------------------
# 6. TREINAMENTO
# ------------------------------------------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=25,
    batch_size=16,
    verbose=1
)


Epoch 1/25
[1m  1/140[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 27ms/step - loss: 0.0339 - mae: 0.1467[1m  8/140[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.0234 - mae: 0.1184 [1m 15/140[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - loss: 0.0226 - mae: 0.1166[1m 23/140[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.0223 - mae: 0.1166[1m 31/140[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.0223 - mae: 0.1172[1m 39/140[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.0224 - mae: 0.1174

In [0]:
# ------------------------------------------------------------
# 7. PREDIÇÃO
# ------------------------------------------------------------
# Vamos pegar a última janela do conjunto completo para simular "previsão do próximo"
last_window = numeros_scaled[-window_size:]  # os últimos 10 sorteios conhecidos
last_window = np.expand_dims(last_window, axis=0)  # reshape (1, window_size, 6)

predicted_scaled = model.predict(last_window)
predicted = predicted_scaled[0] * 59 + 1  # revertendo a normalização

# Arredondar para próximo inteiro dentro de [1..60]
predicted_rounded = np.round(predicted).astype(int)
predicted_rounded = np.clip(predicted_rounded, 1, 60)

print("Possível conjunto previsto (didático!):", predicted_rounded)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Possível conjunto previsto (didático!): [ 9 18 27 34 44 52]
