In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gym
from gym import spaces
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import models, layers, optimizers


In [None]:
# Load your uploaded UAV dataset
data = pd.read_csv('/kaggle/input/uav-autonomous-navigation-dataset/uav_navigation_dataset.csv')
print(data.head())

# Check for missing values
print(data.isnull().sum())


In [None]:
# Select useful features
features = ['latitude', 'longitude', 'altitude', 'lidar_distance', 'wind_speed', 'battery_level']
target = 'obstacle_detected'

# Normalize data for stability
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Convert to numpy arrays
X = data[features].values
y = data[target].values


In [None]:
class DroneEnv(gym.Env):
    def __init__(self, X, y):
        super(DroneEnv, self).__init__()
        self.X = X
        self.y = y
        self.n_samples = len(X)
        
        # Define state (6 features)
        self.observation_space = spaces.Box(low=0, high=1, shape=(6,), dtype=np.float32)
        # Define action space (6 directions + stay)
        self.action_space = spaces.Discrete(7)
        
        self.current_step = 0
        self.goal = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 1.0])  # hypothetical goal

    def reset(self):
        self.current_step = np.random.randint(0, self.n_samples)
        return self.X[self.current_step]

    def step(self, action):
        # Move randomly (simulation)
        self.current_step = (self.current_step + np.random.randint(1, 10)) % self.n_samples
        state = self.X[self.current_step]
        obstacle = self.y[self.current_step]

        # Reward logic
        distance_to_goal = np.linalg.norm(state - self.goal)
        reward = -distance_to_goal
        if obstacle == 1:
            reward -= 1
        if distance_to_goal < 0.1:
            reward += 10  # goal reached

        done = distance_to_goal < 0.05 or self.current_step > self.n_samples - 2
        return state, reward, done, {}

    def render(self, mode='human'):
        pass


In [None]:
def build_model(state_size, action_size):
    model = models.Sequential([
        layers.Input(shape=(state_size,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(action_size, activation='linear')
    ])

    model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='mse')
    return model


In [None]:
env = DroneEnv(X, y)
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
model = build_model(state_size, action_size)


In [None]:
max_steps = 500

for e in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False
    step = 0

    while not done and step < max_steps:
        # select action (epsilon-greedy)
        if np.random.rand() < epsilon:
            action = np.random.randint(action_size)
        else:
            q_values = model.predict(state.reshape(1, -1), verbose=0)
            action = np.argmax(q_values[0])

        # take action
        next_state, reward, done, _ = env.step(action)
        total_reward += reward

        # compute target
        target = reward
        if not done:
            target += gamma * np.max(model.predict(next_state.reshape(1, -1), verbose=0)[0])

        q_values[0][action] = target

        # update model safely
        model.train_on_batch(state.reshape(1, -1), q_values)

        # move forward
        state = next_state
        step += 1

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    print(f"Episode {e+1}/{episodes} - Reward: {total_reward:.2f} - Epsilon: {epsilon:.2f}")


In [None]:
# You can extend this by saving episode rewards and plotting them
# Example:
plt.plot(range(episodes), [np.random.uniform(-10,10) for _ in range(episodes)])
plt.title('Drone Path Optimization Training')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.show()
