In [1]:
! pip install kafka-python-ng



In [2]:
! pip install pandas



In [27]:
from kafka import KafkaProducer
import pandas as pd
import json
import time


producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)


def process_csv(file_path):
    
    try:
        df = pd.read_csv(file_path)
        
        
        required_columns = ['Country', 'Year', 'Coal', 'Total']
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")
        
        
        df = df[df['Coal'] > 0]
        
        
        for _, row in df.iterrows():
            yield {
                "country": row['Country'],
                "year": int(row['Year']),
                "carbon_emission": float(row['Coal']),  
                "activity": row['Total']
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []


try:
    file_path = "/Users/kdn_aikothalavanya/Downloads/GCB2022v27_MtCO2_flat.csv"
    for data in process_csv(file_path):
        future = producer.send('carbonfootprint', data)
        print(f"Sent: {data}")
        time.sleep(0.5)  
except Exception as e:
    print(f"Error sending data: {e}")
finally:
    producer.close()


Sent: {'country': 'Afghanistan', 'year': 1949, 'carbon_emission': 0.014656, 'activity': 0.014656}
Sent: {'country': 'Afghanistan', 'year': 1950, 'carbon_emission': 0.021068, 'activity': 0.084272}
Sent: {'country': 'Afghanistan', 'year': 1951, 'carbon_emission': 0.025648, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'year': 1952, 'carbon_emission': 0.031708, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'year': 1953, 'carbon_emission': 0.037949, 'activity': 0.106256}
Sent: {'country': 'Afghanistan', 'year': 1954, 'carbon_emission': 0.042502, 'activity': 0.106256}
Sent: {'country': 'Afghanistan', 'year': 1955, 'carbon_emission': 0.062288, 'activity': 0.153888}
Sent: {'country': 'Afghanistan', 'year': 1956, 'carbon_emission': 0.062288, 'activity': 0.1832}
Sent: {'country': 'Afghanistan', 'year': 1957, 'carbon_emission': 0.076944, 'activity': 0.29312}
Sent: {'country': 'Afghanistan', 'year': 1958, 'carbon_emission': 0.0916, 'activity': 0.32976}
Sent: {'country': 'Afghanistan

KeyboardInterrupt: 

In [2]:
! pip install pycountry




In [3]:
from kafka import KafkaProducer
import pandas as pd
import json
import time
import pycountry

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Function to map country names to ISO 3166-1 alpha-3 codes
def get_iso_alpha3(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_3
    except LookupError:
        return None

# Function to process CSV and prepare data
def process_csv(file_path):
    try:
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Ensure required columns are present
        required_columns = ['Country', 'Year', 'Coal', 'Total']
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")
        
        # Filter rows where 'Coal' > 0
        df = df[df['Coal'] > 0]
        
        # Add ISO 3166-1 alpha-3 codes
        df['ISO_alpha3'] = df['Country'].apply(get_iso_alpha3)
        
        # Drop rows with missing ISO alpha-3 codes
        df = df.dropna(subset=['ISO_alpha3'])
        
        # Yield each row as a dictionary
        for _, row in df.iterrows():
            yield {
                "country": row['Country'],
                "iso_alpha3": row['ISO_alpha3'],
                "year": int(row['Year']),
                "carbon_emission": float(row['Coal']),
                "activity": row['Total']
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

# Main block to read CSV and send data to Kafka
try:
    file_path = "/Users/kdn_aikothalavanya/Downloads/GCB2022v27_MtCO2_flat.csv"
    for data in process_csv(file_path):
        future = producer.send('carbonfootprint', data)
        print(f"Sent: {data}")
        time.sleep(0.5)  # Throttle to avoid overwhelming Kafka
except Exception as e:
    print(f"Error sending data: {e}")
finally:
    producer.close()


Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1949, 'carbon_emission': 0.014656, 'activity': 0.014656}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1950, 'carbon_emission': 0.021068, 'activity': 0.084272}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1951, 'carbon_emission': 0.025648, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1952, 'carbon_emission': 0.031708, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1953, 'carbon_emission': 0.037949, 'activity': 0.106256}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1954, 'carbon_emission': 0.042502, 'activity': 0.106256}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1955, 'carbon_emission': 0.062288, 'activity': 0.153888}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1956, 'carbon_emission': 0.062288, 'activity': 0.1832}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 

KeyboardInterrupt: 

In [4]:
! pip install kafka-python gym numpy tensorflow




In [4]:
import numpy as np
import gym
from gym import spaces

class CarbonEmissionsEnv(gym.Env):
    def __init__(self, kafka_consumer):
        super(CarbonEmissionsEnv, self).__init__()
        self.kafka_consumer = kafka_consumer
        
        # Define action and observation space
        self.action_space = spaces.Discrete(3)  # Actions: 0 - Reduce coal, 1 - Promote renewables, 2 - Recommend policy
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(4,), dtype=np.float32)  # Example: Country, Year, Carbon, Activity
        
        # Initialize state
        self.state = None
        self.done = False
    
    def step(self, action):
        # Apply the chosen action and calculate reward
        reward = 0
        if action == 0:  # Reduce coal
            reward = -self.state[2] * 0.1
        elif action == 1:  # Promote renewables
            reward = -self.state[3] * 0.2
        elif action == 2:  # Recommend policy
            reward = -self.state[2] * 0.05
        
        # Simulate new state from Kafka
        self.state = self._get_new_state()
        
        # Check if episode is done
        self.done = self.state is None
        
        return self.state, reward, self.done, {}
    
    def reset(self):
        # Reset state using Kafka data
        self.state = self._get_new_state()
        return self.state
    
    def _get_new_state(self):
        # Fetch new data from Kafka
        for message in self.kafka_consumer:
            data = json.loads(message.value)
            return np.array([data['country'], data['year'], data['carbon_emission'], data['activity']])
        return None


In [6]:
import numpy as np

class CarbonEmissionsEnv:
    def __init__(self, consumer):
        self.consumer = consumer  # Kafka consumer instance
        self.current_state = None
        self.done = False

    def reset(self):
        # Define the initial state (e.g., initial emissions, activity levels, etc.)
        self.current_state = np.zeros(4)  # Example: [country, year, emission, activity]
        self.done = False
        return self.current_state

    def step(self, action):
        # Simulate environment dynamics based on the agent's action
        # For simplicity, you can use random data to mock state transitions and rewards
        next_state = self.current_state + np.random.rand(4)  # Mock transition
        reward = -1 if action == 0 else 1  # Mock reward system
        self.done = np.random.rand() > 0.95  # Mock end condition
        return next_state, reward, self.done, {}


In [8]:
from kafka import KafkaConsumer

consumer = KafkaConsumer(
    'carbonfootprint',
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)


In [9]:
env = CarbonEmissionsEnv(consumer)


In [10]:
import numpy as np
import tensorflow as tf
import random
from collections import deque


2025-03-19 15:28:19.930260: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
import tensorflow as tf
from tensorflow.keras import models, layers
from collections import deque
import numpy as np
import random

# Define the DQNAgent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
    
    def _build_model(self):
        model = models.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Define the environment class (for simplicity, replace with your environment)
class CarbonEmissionsEnv:
    def __init__(self, consumer):
        self.consumer = consumer
        self.state = None
    
    def reset(self):
        # Simulate environment reset
        self.state = np.random.rand(4)  # Replace with actual logic
        return self.state
    
    def step(self, action):
        # Simulate environment step
        reward = np.random.rand()  # Replace with actual reward logic
        next_state = np.random.rand(4)  # Replace with actual next state logic
        done = np.random.rand() < 0.1  # End episode randomly (10% chance)
        return next_state, reward, done, {}

# Initialize environment and agent
state_size = 4
action_size = 3
env = CarbonEmissionsEnv(None)  # Replace `None` with your Kafka consumer
agent = DQNAgent(state_size, action_size)

# Train the agent
episodes = 100
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    for time in range(500):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print(f"episode: {e}/{episodes}, score: {time}")
            break
    # Replay experiences
    agent.replay(32)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


episode: 0/100, score: 0
episode: 1/100, score: 1
episode: 2/100, score: 12
episode: 3/100, score: 36
episode: 4/100, score: 4
episode: 5/100, score: 2


KeyboardInterrupt: 

In [4]:
import tensorflow as tf
from tensorflow.keras import models, layers
from collections import deque
import numpy as np
import random

# Define the DQNAgent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.reward_threshold = 10  # Initial reward threshold
        self.model = self._build_model()
        self.optimizations = []

    def _build_model(self):
        model = models.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        # Adjust epsilon dynamically
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def adjust_parameters(self, avg_reward):
        """Adjusts learning parameters based on performance."""
        if avg_reward > self.reward_threshold:
            self.reward_threshold += 5
            self.epsilon_decay *= 0.99
            self.learning_rate *= 1.05
            self.model.optimizer.learning_rate.assign(self.learning_rate)
            self.optimizations.append(f"Adjusted reward threshold to {self.reward_threshold}, epsilon_decay to {self.epsilon_decay:.4f}, learning_rate to {self.learning_rate:.4f}.")

# Define the environment class
class CarbonEmissionsEnv:
    def __init__(self, consumer):
        self.consumer = consumer
        self.state = None

    def reset(self):
        self.state = np.random.rand(4)  # Replace with actual logic
        return self.state

    def step(self, action):
        reward = np.random.rand() * (action + 1)  # Simulated reward logic
        next_state = np.random.rand(4)  # Replace with actual logic
        done = np.random.rand() < 0.1  # End episode randomly
        return next_state, reward, done, {}

# Initialize environment and agent
state_size = 4
action_size = 3
env = CarbonEmissionsEnv(None)
agent = DQNAgent(state_size, action_size)

# Train the agent
episodes = 100
successes = 0
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    total_reward = 0
    for time in range(500):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            if total_reward > agent.reward_threshold:
                successes += 1
            print(f"episode: {e}/{episodes}, score: {time}, total_reward: {total_reward:.2f}")
            break
    agent.replay(32)
    agent.adjust_parameters(total_reward)

# Calculate accuracy
accuracy = (successes / episodes) * 100
print(f"Training Accuracy: {accuracy:.2f}%")

# Display optimizations
print("Optimizations:")
for optimization in agent.optimizations:
    print(optimization)


2025-03-19 01:39:31.464729: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


episode: 0/100, score: 5, total_reward: 6.81
episode: 1/100, score: 4, total_reward: 1.66
episode: 2/100, score: 3, total_reward: 4.45
episode: 3/100, score: 34, total_reward: 30.57


KeyboardInterrupt: 

In [12]:
import tensorflow as tf
from tensorflow.keras import models, layers
from collections import deque
import numpy as np
import random

# Define the DQNAgent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.performance_history = []

    def _build_model(self):
        model = models.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def monitor_and_optimize(self, recent_rewards):
        # Monitor performance
        avg_reward = np.mean(recent_rewards)
        self.performance_history.append(avg_reward)
        
        # Dynamically adjust parameters based on performance
        optimizations = []
        if avg_reward > np.mean(self.performance_history[:-1]):
            if self.epsilon_decay > 0.990:
                self.epsilon_decay -= 0.002  # Gradually reduce decay
                optimizations.append(f"Reduced epsilon_decay to {self.epsilon_decay:.4f}")
            if self.learning_rate > 0.0005:
                self.learning_rate -= 0.0001  # Gradually reduce learning rate
                optimizations.append(f"Reduced learning_rate to {self.learning_rate:.6f}")
        else:
            if self.epsilon_decay < 0.999:
                self.epsilon_decay += 0.001  # Gradually increase decay for stability
                optimizations.append(f"Increased epsilon_decay to {self.epsilon_decay:.4f}")
            if self.learning_rate < 0.001:
                self.learning_rate += 0.00005  # Gradually increase learning rate
                optimizations.append(f"Increased learning_rate to {self.learning_rate:.6f}")
        
        return optimizations

# Define the environment class
class CarbonEmissionsEnv:
    def __init__(self):
        self.state = None
    
    def reset(self):
        self.state = np.random.rand(4)  # Simulate environment reset
        return self.state
    
    def step(self, action):
        # Simulate environment step
        emission_reduction = action * 0.1  # Example: action scales emission reduction
        cost_penalty = (3 - action) * 0.05  # Example: cost penalty for less efficient actions
        efficiency_bonus = 0.2 if action == 2 else 0.0  # Reward for optimal actions
        reward = emission_reduction - cost_penalty + efficiency_bonus
        next_state = np.random.rand(4)
        done = np.random.rand() < 0.1  # End episode randomly (10% chance)
        return next_state, reward, done, {}

# Initialize environment and agent
state_size = 4
action_size = 3
env = CarbonEmissionsEnv()
agent = DQNAgent(state_size, action_size)

# Train the agent
episodes = 100
batch_size = 32
reward_history = deque(maxlen=10)

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    total_reward = 0
    for time in range(500):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            break
    reward_history.append(total_reward)
    optimizations = agent.monitor_and_optimize(reward_history)
    print(f"episode: {e}/{episodes}, score: {time}, total_reward: {total_reward:.2f}")
    if optimizations:
        print("Optimizations triggered:")
        for opt in optimizations:
            print(f"- {opt}")
    agent.replay(batch_size)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


episode: 0/100, score: 21, total_reward: 2.20
Optimizations triggered:
- Increased epsilon_decay to 0.9960
episode: 1/100, score: 28, total_reward: 3.15
Optimizations triggered:
- Reduced epsilon_decay to 0.9940
- Reduced learning_rate to 0.000900
episode: 2/100, score: 5, total_reward: 0.60
Optimizations triggered:
- Increased epsilon_decay to 0.9950
- Increased learning_rate to 0.000950
episode: 3/100, score: 13, total_reward: 1.65
Optimizations triggered:
- Increased epsilon_decay to 0.9960
- Increased learning_rate to 0.001000
episode: 4/100, score: 11, total_reward: 1.95
Optimizations triggered:
- Increased epsilon_decay to 0.9970
episode: 5/100, score: 6, total_reward: -0.40
Optimizations triggered:
- Increased epsilon_decay to 0.9980
episode: 6/100, score: 26, total_reward: 0.55
Optimizations triggered:
- Increased epsilon_decay to 0.9990
episode: 7/100, score: 6, total_reward: 1.40
episode: 8/100, score: 3, total_reward: 0.05
episode: 9/100, score: 9, total_reward: 0.95
episode

In [2]:
! pip install --upgrade elasticsearch




In [3]:
import numpy as np
import tensorflow as tf
from elasticsearch import Elasticsearch
from sklearn.preprocessing import StandardScaler
import joblib

# Elasticsearch connection
es = Elasticsearch("http://localhost:9200")
index_name = "iot_sensor"

# Load the pre-trained model and scaler
best_model = joblib.load("/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/aqi_predictor_tuned.pkl")  # Replace with the path to your saved XGBoost model
scaler = joblib.load("/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/scaler.pkl")  # Replace with the path to your saved scaler

# Deep RL Agent class
class RLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(self.state_size,)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(self.action_size, activation='softmax')  # Action probabilities
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy')
        return model

    def predict_action(self, state):
        action_probs = self.model.predict(state, verbose=0)
        return np.argmax(action_probs)

    def train(self, state, action, reward):
        target = np.zeros((1, self.action_size))
        target[0, action] = reward
        self.model.fit(state, target, verbose=0)

# Fetch real-time data from Elasticsearch
def fetch_real_time_data(es_client, index):
    try:
        query = {"query": {"match_all": {}}}
        response = es_client.search(index=index, body=query, size=1)
        return response['hits']['hits'][0]['_source']
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

# Define feature columns used in the model
features = ["feature1", "feature2", "feature3", "feature4", "feature5", "feature6", "feature7", "feature8"]

# Main loop for RL agent interaction
agent = RLAgent(state_size=8, action_size=3)  # Adjust sizes based on features and actions

def main_loop():
    while True:
        sensor_data = fetch_real_time_data(es, index_name)
        if sensor_data:
            try:
                # Extract features and scale the state
                state = np.array([[sensor_data[col] for col in features]])
                scaled_state = scaler.transform(state)

                # Predict AQI using the pre-trained model
                predicted_aqi = best_model.predict(scaled_state)[0]

                # Determine action using the RL agent
                action = agent.predict_action(scaled_state)

                # Define reward based on AQI thresholds
                reward = 1 if predicted_aqi < 50 else -1

                # Train the RL agent
                agent.train(scaled_state, action, reward)

                print(f"State: {state}, Predicted AQI: {predicted_aqi}, Action: {action}, Reward: {reward}")

            except Exception as e:
                print(f"Error during processing: {e}")

if __name__ == "__main__":
    main_loop()


  response = es_client.search(index=index, body=query, size=1)


Error fetching data: ApiError(406, 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported', 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported')
Error fetching data: ApiError(406, 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported', 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported')
Error fetching data: ApiError(406, 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported', 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported')
Error fetching data: ApiError(406, 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported', 'Content-Type header [application/vnd.elasticsearch+json; compatible-with=8] is not supported')
Error fetching data: ApiError(406, 'Content-Type header [application/vnd.elasticsearch+json;

KeyboardInterrupt: 

In [6]:
! pip install transformers pytorch pandas


Collecting transformers
  Using cached transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting pytorch
  Using cached pytorch-1.0.2.tar.gz (689 bytes)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pandas
  Using cached pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl.metadata (89 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Using cached huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cach

In [8]:
! pip install pandas

Collecting pandas
  Using cached pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl (12.6 MB)
Using cached pytz-2025.1-py2.py3-none-any.whl (507 kB)
Using cached tzdata-2025.1-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2025.1 tzdata-2025.1


In [10]:
! pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl.metadata (31 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl (12.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl (39.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.4/39.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Install

In [12]:
! pip install transformers

Collecting transformers
  Using cached transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.26.0 (from transformers)
  Using cached huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cached safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl.metadata (3.8 kB)
Collecting tqdm>=4.27 (from transformers)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting fsspec>=2023.5.0 (from huggin

In [2]:
! pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Looking in indexes: https://download.pytorch.org/whl/cpu


In [1]:
! pip install -U transformers




In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
import joblib

# Simulate a dataset resembling your real-time monitoring data
# Example fields: ['year', 'carbon_emission', 'activity']
data = {
    'year': np.random.randint(2000, 2025, size=100),  # Random years between 2000 and 2025
    'carbon_emission': np.random.uniform(0.1, 500.0, size=100),  # Random emissions between 0.1 and 500.0
    'activity': np.random.uniform(0.1, 1000.0, size=100),  # Random activity levels
}

# Create a DataFrame
df = pd.DataFrame(data)

# Define features to use for polynomial transformation
features = ['year', 'carbon_emission', 'activity']

# Prepare the PolynomialFeatures transformer
poly = PolynomialFeatures(degree=2, include_bias=False)

# Fit and transform the features
poly.fit(df[features])

# Save the transformer
joblib.dump(poly, "poly_transform.pkl")

print("PolynomialFeatures transformer has been saved as 'poly_transform.pkl'")


PolynomialFeatures transformer has been saved as 'poly_transform.pkl'


In [12]:
! pip install transformers datasets torch


Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.11.14-cp310-cp310-macosx_10_9_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->data

In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load tokenizer and model
model_name = "gpt2"  # You can replace this with a larger model if needed (like GPT-Neo)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Enable CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(f"Model loaded on {device}.")


  from .autonotebook import tqdm as notebook_tqdm


Model loaded on cpu.


In [29]:
from kafka import KafkaProducer
import pandas as pd
import json
import time
import pycountry

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Function to map country names to ISO 3166-1 alpha-3 codes
def get_iso_alpha3(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_3
    except LookupError:
        return None

# Function to process CSV and prepare data
def process_csv(file_path, countries_with_limit):
    try:
        # Read CSV
        df = pd.read_csv(file_path)

        # Ensure required columns are present
        required_columns = ['Country', 'Year', 'Coal', 'Total']
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")

        # Filter rows where 'Coal' > 0
        df = df[df['Coal'] > 0]

        # Add ISO 3166-1 alpha-3 codes
        df['ISO_alpha3'] = df['Country'].apply(get_iso_alpha3)

        # Drop rows with missing ISO alpha-3 codes
        df = df.dropna(subset=['ISO_alpha3'])

        # Filter data for specific countries and limits
        filtered_data = []
        for country, limit in countries_with_limit.items():
            country_data = df[df['Country'] == country].head(limit)
            filtered_data.append(country_data)

        # Concatenate filtered data
        result_df = pd.concat(filtered_data)

        # Yield each row as a dictionary
        for _, row in result_df.iterrows():
            yield {
                "country": row['Country'],
                "iso_alpha3": row['ISO_alpha3'],
                "year": int(row['Year']),
                "carbon_emission": float(row['Coal']),
                "activity": row['Total']
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

# Main block to read CSV and send data to Kafka
try:
    file_path = "/Users/kdn_aikothalavanya/Downloads/GCB2022v27_MtCO2_flat.csv"

    # Specify countries and limits
    countries_with_limit = {
        "Afghanistan": 5,
        "Albania": 5,
        "India": 5,
        "Germany": 5
    }

    for data in process_csv(file_path, countries_with_limit):
        future = producer.send('carbonfootprint', data)
        print(f"Sent: {data}")
        time.sleep(0.5)  # Throttle to avoid overwhelming Kafka
except Exception as e:
    print(f"Error sending data: {e}")
finally:
    producer.close()

Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1949, 'carbon_emission': 0.014656, 'activity': 0.014656}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1950, 'carbon_emission': 0.021068, 'activity': 0.084272}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1951, 'carbon_emission': 0.025648, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1952, 'carbon_emission': 0.031708, 'activity': 0.0916}
Sent: {'country': 'Afghanistan', 'iso_alpha3': 'AFG', 'year': 1953, 'carbon_emission': 0.037949, 'activity': 0.106256}
Sent: {'country': 'Albania', 'iso_alpha3': 'ALB', 'year': 1946, 'carbon_emission': 0.021984, 'activity': 0.483648}
Sent: {'country': 'Albania', 'iso_alpha3': 'ALB', 'year': 1948, 'carbon_emission': 0.007328, 'activity': 0.703488}
Sent: {'country': 'Albania', 'iso_alpha3': 'ALB', 'year': 1950, 'carbon_emission': 0.043432, 'activity': 0.296725}
Sent: {'country': 'Albania', 'iso_alpha3': 'ALB', 'year': 1951, 

In [18]:
! pip install elasticsearch

Collecting elasticsearch
  Downloading elasticsearch-8.17.2-py3-none-any.whl.metadata (8.8 kB)
Collecting elastic-transport<9,>=8.15.1 (from elasticsearch)
  Downloading elastic_transport-8.17.1-py3-none-any.whl.metadata (3.8 kB)
Downloading elasticsearch-8.17.2-py3-none-any.whl (717 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.0/718.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading elastic_transport-8.17.1-py3-none-any.whl (64 kB)
Installing collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-8.17.1 elasticsearch-8.17.2


In [19]:
from elasticsearch import Elasticsearch
import pandas as pd
import json
import time
import pycountry

# Initialize Elasticsearch Client
es = Elasticsearch("http://localhost:9200")  # Replace with your Elasticsearch URL if hosted elsewhere

# Function to map country names to ISO 3166-1 alpha-3 codes
def get_iso_alpha3(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_3
    except LookupError:
        return None

# Function to process CSV and prepare data
def process_csv(file_path, countries_with_limit):
    try:
        # Read CSV
        df = pd.read_csv(file_path)

        # Ensure required columns are present
        required_columns = ['Country', 'Year', 'Coal', 'Total']
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")

        # Filter rows where 'Coal' > 0
        df = df[df['Coal'] > 0]

        # Add ISO 3166-1 alpha-3 codes
        df['ISO_alpha3'] = df['Country'].apply(get_iso_alpha3)

        # Drop rows with missing ISO alpha-3 codes
        df = df.dropna(subset=['ISO_alpha3'])

        # Filter data for specific countries and limits
        filtered_data = []
        for country, limit in countries_with_limit.items():
            country_data = df[df['Country'] == country].head(limit)
            filtered_data.append(country_data)

        # Concatenate filtered data
        result_df = pd.concat(filtered_data)

        # Yield each row as a dictionary
        for _, row in result_df.iterrows():
            yield {
                "country": row['Country'],
                "iso_alpha3": row['ISO_alpha3'],
                "year": int(row['Year']),
                "carbon_emission": float(row['Coal']),
                "activity": row['Total']
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

# Function to index data in Elasticsearch
def index_to_elasticsearch(index_name, data):
    try:
        # Index the data into Elasticsearch
        response = es.index(index=index_name, body=data)
        print(f"Document indexed. ID: {response['_id']}")
    except Exception as e:
        print(f"Error indexing document: {e}")

# Main block to read CSV and send data to Elasticsearch
try:
    file_path = "/Users/kdn_aikothalavanya/Downloads/GCB2022v27_MtCO2_flat.csv"

    # Specify countries and limits
    countries_with_limit = {
        "Afghanistan": 5,
        "Albania": 5,
        "India": 5,
        "Germany": 5
    }

    index_name = "carbonfootprint"  # Elasticsearch index name

    # Create index if it doesn't exist
    if not es.indices.exists(index=index_name):
        es.indices.create(index=index_name)
        print(f"Created Elasticsearch index: {index_name}")

    # Process CSV and send data to Elasticsearch
    for data in process_csv(file_path, countries_with_limit):
        index_to_elasticsearch(index_name, data)
        time.sleep(0.5)  # Throttle to avoid overwhelming Elasticsearch
except Exception as e:
    print(f"Error sending data: {e}")
finally:
    es.close()


Error sending data: Connection error caused by: ConnectionError(Connection error caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x151c82080>: Failed to establish a new connection: [Errno 61] Connection refused))


In [6]:
from kafka import KafkaProducer
import pandas as pd
import json
import time

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Function to process CSV and prepare data
def process_csv(file_path):
    try:
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Ensure required columns are present
        required_columns = [
            'Timestamp', 'CO2_ppm', 'NO2_ppm', 'SO2_ppm', 
            'Temperature_C', 'Humidity_%', 'AQI', 
            'Environmental_Score', 'Social_Score', 'Governance_Score'
        ]
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")
        
        # Convert Timestamp to string if not already
        df['Timestamp'] = df['Timestamp'].astype(str)
        
        # Yield each row as a dictionary
        for _, row in df.iterrows():
            yield {
                "timestamp": row['Timestamp'],
                "co2_ppm": float(row['CO2_ppm']),
                "no2_ppm": float(row['NO2_ppm']),
                "so2_ppm": float(row['SO2_ppm']),
                "temperature_c": float(row['Temperature_C']),
                "humidity_percent": float(row['Humidity_%']),
                "aqi": int(row['AQI']),
                "environmental_score": float(row['Environmental_Score']),
                "social_score": float(row['Social_Score']),
                "governance_score": float(row['Governance_Score'])
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

# Main block to send data to Kafka topic
def send_to_kafka(file_path):
    try:
        for data in process_csv(file_path):
            producer.send('environmental_data', data)
            print(f"Sent: {data}")
            time.sleep(0.5)  # Throttle to avoid overwhelming Kafka
    except Exception as e:
        print(f"Error sending data: {e}")
    finally:
        producer.close()

if __name__ == "__main__":
    file_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/real_time_monitoring_data.csv"
    send_to_kafka(file_path)


Sent: {'timestamp': '2025-03-22 22:18:49', 'co2_ppm': 548.606014687361, 'no2_ppm': 9.916281225491558, 'so2_ppm': 1.0403628061379906, 'temperature_c': 11.633600152610738, 'humidity_percent': 25.74550592759281, 'aqi': 5, 'environmental_score': 72.6752611103381, 'social_score': 68.55838175232356, 'governance_score': 73.97769473652139}
Sent: {'timestamp': '2025-03-22 22:22:25', 'co2_ppm': 4042.507826442367, 'no2_ppm': 3.8811434858951226, 'so2_ppm': 2.17174804405351, 'temperature_c': 10.892209271710792, 'humidity_percent': 21.947806541725253, 'aqi': 3, 'environmental_score': 56.73257793373792, 'social_score': 85.43385040728906, 'governance_score': 83.41359404561356}
Sent: {'timestamp': '2025-03-22 22:22:28', 'co2_ppm': 2113.433202346877, 'no2_ppm': 9.487869562551154, 'so2_ppm': 13.260369291039805, 'temperature_c': 14.98161692420653, 'humidity_percent': 70.22854343990032, 'aqi': 11, 'environmental_score': 86.87958824345276, 'social_score': 93.98969293653772, 'governance_score': 94.1024090057

In [38]:
from kafka import KafkaProducer
import pandas as pd
import json
import time

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

# Function to process CSV and prepare data
def process_csv(file_path):
    try:
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Ensure required columns are present
        required_columns = [
            'Timestamp', 'CO2_ppm', 'NO2_ppm', 'SO2_ppm', 
            'Temperature_C', 'Humidity_%', 'AQI', 
            'Environmental_Score', 'Social_Score', 'Governance_Score'
        ]
        if not all(col in df.columns for col in required_columns):
            raise ValueError(f"CSV file must contain the following columns: {required_columns}")
        
        # Convert Timestamp to string if not already
        df['Timestamp'] = df['Timestamp'].astype(str)
        
        # Yield each row as a dictionary
        for _, row in df.iterrows():
            yield {
                "timestamp": row['Timestamp'],
                "co2_ppm": float(row['CO2_ppm']),
                "no2_ppm": float(row['NO2_ppm']),
                "so2_ppm": float(row['SO2_ppm']),
                "temperature_c": float(row['Temperature_C']),
                "humidity_percent": float(row['Humidity_%']),
                "aqi": int(row['AQI']),
                "environmental_score": float(row['Environmental_Score']),
                "social_score": float(row['Social_Score']),
                "governance_score": float(row['Governance_Score'])
            }
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return []

# Main block to send data to Kafka topic
def send_to_kafka(file_path):
    try:
        print("Reading and sending data to Kafka...")
        for data in process_csv(file_path):
            producer.send('carbonfootprint', data)
            print(f"Sent: {data}")
            time.sleep(0.5)  # Throttle to avoid overwhelming Kafka
    except Exception as e:
        print(f"Error sending data: {e}")
    finally:
        producer.close()

if __name__ == "__main__":
    file_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/real_time_monitoring_data.csv"
    send_to_kafka(file_path)


Reading and sending data to Kafka...
Sent: {'timestamp': '2025-03-22 22:18:49', 'co2_ppm': 548.606014687361, 'no2_ppm': 9.916281225491558, 'so2_ppm': 1.0403628061379906, 'temperature_c': 11.633600152610738, 'humidity_percent': 25.74550592759281, 'aqi': 5, 'environmental_score': 72.6752611103381, 'social_score': 68.55838175232356, 'governance_score': 73.97769473652139}
Sent: {'timestamp': '2025-03-22 22:22:25', 'co2_ppm': 4042.507826442367, 'no2_ppm': 3.8811434858951226, 'so2_ppm': 2.17174804405351, 'temperature_c': 10.892209271710792, 'humidity_percent': 21.947806541725253, 'aqi': 3, 'environmental_score': 56.73257793373792, 'social_score': 85.43385040728906, 'governance_score': 83.41359404561356}
Sent: {'timestamp': '2025-03-22 22:22:28', 'co2_ppm': 2113.433202346877, 'no2_ppm': 9.487869562551154, 'so2_ppm': 13.260369291039805, 'temperature_c': 14.98161692420653, 'humidity_percent': 70.22854343990032, 'aqi': 11, 'environmental_score': 86.87958824345276, 'social_score': 93.989692936537

In [39]:
from kafka import KafkaConsumer
import json

# Initialize Kafka Consumer
consumer = KafkaConsumer(
    'carbonfootprint',
    bootstrap_servers=['localhost:9092'],
    value_deserializer=lambda v: json.loads(v.decode('utf-8')),
    auto_offset_reset='earliest',  # Start reading from the beginning
    enable_auto_commit=True,
    group_id='carbonfootprint_group'
)

def consume_from_kafka():
    try:
        print("Consuming messages from Kafka...")
        for message in consumer:
            data = message.value
            print(f"Received: {data}")
    except KeyboardInterrupt:
        print("Stopping consumer...")
    finally:
        consumer.close()

if __name__ == "__main__":
    consume_from_kafka()


Consuming messages from Kafka...
Stopping consumer...


In [5]:
import numpy as np
import gym
from gym import spaces

class IoTEnvironment(gym.Env):
    def __init__(self, sensor_data):
        super(IoTEnvironment, self).__init__()
        self.sensor_data = sensor_data
        self.current_step = 0

        # Define observation and action spaces
        self.observation_space = spaces.Box(low=np.min(sensor_data), high=np.max(sensor_data), shape=(len(sensor_data[0]),), dtype=np.float32)
        self.action_space = spaces.Discrete(2)  # Example: 0 = Normal, 1 = Alert

    def step(self, action):
        # Apply the action and calculate the reward
        done = self.current_step >= len(self.sensor_data) - 1
        reward = 1 if (action == 1 and self.is_anomaly()) else 0
        self.current_step += 1

        return self.sensor_data[self.current_step], reward, done, {}

    def reset(self):
        self.current_step = 0
        return self.sensor_data[self.current_step]

    def is_anomaly(self):
        # Example logic for anomaly detection
        return np.random.random() < 0.1  # Replace with domain-specific logic

# Example usage:
# sensor_data = np.array([doc["_source"]["sensor_value"] for doc in retrieved_data])  # Extract IoT sensor values
# env = IoTEnvironment(sensor_data)


In [7]:
! pip install stable_baselines3

Collecting stable_baselines3
  Using cached stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium<1.1.0,>=0.29.1 (from stable_baselines3)
  Using cached gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
INFO: pip is looking at multiple versions of stable-baselines3 to determine which version is compatible with other requirements. This could take a while.
Collecting stable_baselines3
  Using cached stable_baselines3-2.4.1-py3-none-any.whl.metadata (4.5 kB)
Collecting matplotlib (from stable_baselines3)
  Downloading matplotlib-3.10.1-cp310-cp310-macosx_10_12_x86_64.whl.metadata (11 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<1.1.0,>=0.29.1->stable_baselines3)
  Using cached Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Collecting contourpy>=1.0.1 (from matplotlib->stable_baselines3)
  Downloading contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib->stable_baselines3)
  

In [11]:
! pip install --upgrade stable-baselines3 gymnasium


Collecting stable-baselines3
  Using cached stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium
  Downloading gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
INFO: pip is looking at multiple versions of stable-baselines3 to determine which version is compatible with other requirements. This could take a while.


In [14]:
! pip install shimmy

Collecting shimmy
  Using cached Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Using cached Shimmy-2.0.0-py3-none-any.whl (30 kB)
Installing collected packages: shimmy
Successfully installed shimmy-2.0.0


In [29]:
! pip install gymnasium




In [23]:
from elasticsearch import Elasticsearch
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import gym
import numpy as np

# Elasticsearch connection
es = Elasticsearch("http://localhost:9200")  # Replace with your Elasticsearch host and port
index_name = "iot_sensor"  # Replace with your index name

def retrieve_data(es_client, index, size=1000):
    """
    Retrieves documents from the specified Elasticsearch index.

    :param es_client: Elasticsearch client instance.
    :param index: Index name to query.
    :param size: Number of documents to retrieve.
    :return: Retrieved documents as a pandas DataFrame.
    """
    try:
        # Query to fetch all documents
        query = {
            "query": {
                "match_all": {}
            }
        }
        response = es_client.search(index=index, body=query, size=size)
        documents = response.get("hits", {}).get("hits", [])
        if documents:
            print(f"Retrieved {len(documents)} documents from index '{index}'.")
            data = [doc["_source"] for doc in documents]
            return pd.DataFrame(data)
        else:
            print(f"No data found in index '{index}'.")
            return pd.DataFrame()
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return pd.DataFrame()

class CarbonFootprintEnv(gym.Env):
    """
    Custom environment for monitoring and optimizing carbon footprint.
    """
    def __init__(self, data):
        super(CarbonFootprintEnv, self).__init__()
        self.data = data
        self.observation_space = gym.spaces.Box(
            low=0, high=1, shape=(data.shape[1],), dtype=np.float32
        )
        self.action_space = gym.spaces.Discrete(3)  # Actions: reduce, maintain, increase
        self.current_step = 0

    def seed(self, seed=None):
        """
        Sets the seed for the environment's random number generator.

        :param seed: Seed value.
        """
        np.random.seed(seed)

    def reset(self):
        self.current_step = 0
        return self.data.iloc[self.current_step].values

    def step(self, action):
        reward = 0
        done = False
        if action == 0:  # Reduce emissions
            reward = -self.data.iloc[self.current_step]["CO2_ppm"]
        elif action == 1:  # Maintain
            reward = -self.data.iloc[self.current_step]["CO2_ppm"] * 0.5
        elif action == 2:  # Increase (penalty)
            reward = -self.data.iloc[self.current_step]["CO2_ppm"] * 1.5

        self.current_step += 1
        if self.current_step >= len(self.data):
            done = True
        obs = self.data.iloc[self.current_step].values if not done else np.zeros(self.data.shape[1])
        return obs, reward, done, {}

    def render(self, mode="human"):
        print(f"Step: {self.current_step}")

if __name__ == "__main__":
    # Retrieve data from Elasticsearch
    print(f"Retrieving data from Elasticsearch index '{index_name}'...")
    sensor_data = retrieve_data(es, index_name)

    if not sensor_data.empty:
        # Fill missing values
        sensor_data.fillna(0, inplace=True)

        # Convert relevant columns to numeric
        numeric_columns = ["CO2_ppm", "NO2_ppm", "SO2_ppm", "Environmental_Score", "Social_Score", "Governance_Score"]
        for col in numeric_columns:
            sensor_data[col] = pd.to_numeric(sensor_data[col], errors='coerce')

        # Handle NaN values
        sensor_data.fillna(0, inplace=True)

        # Normalize the data
        sensor_data = sensor_data[numeric_columns]
        sensor_data = (sensor_data - sensor_data.min()) / (sensor_data.max() - sensor_data.min())

        # Create and train the RL agent
        env = make_vec_env(lambda: CarbonFootprintEnv(sensor_data), n_envs=1)
        model = PPO("MlpPolicy", env, verbose=1)
        model.learn(total_timesteps=10000)

        # Evaluate the agent
        obs = env.reset()
        for _ in range(100):
            action, _ = model.predict(obs)
            obs, rewards, dones, _ = env.step(action)
            env.render()
    else:
        print("No data available to train the AI agent.")


Retrieving data from Elasticsearch index 'iot_sensor'...
Retrieved 200 documents from index 'iot_sensor'.
Using cpu device


  response = es_client.search(index=index, body=query, size=size)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -99.6    |
| time/              |          |
|    fps             | 1376     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -97.2       |
| time/                   |             |
|    fps                  | 913         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009565471 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.0223     |
|    learning_rate        | 0.



In [31]:
! pip install gymnasium




In [34]:
import numpy as np
import pandas as pd
from gym import Env
from gym.spaces import Box
from stable_baselines3 import PPO

class IoTSensorEnv(Env):
    def __init__(self, data):
        super(IoTSensorEnv, self).__init__()
        self.data = data
        self.current_step = 0
        self.max_steps = len(data)

        # Define action and observation spaces
        self.action_space = Box(low=-1, high=1, shape=(1,), dtype=np.float32)  # Example: Adjust CO2 levels
        self.observation_space = Box(
            low=-np.inf, 
            high=np.inf, 
            shape=(data.shape[1],), 
            dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        return self.data.iloc[self.current_step].values.astype(np.float32)

    def step(self, action):
        # Get the current data row
        current_data = self.data.iloc[self.current_step]

        # Simulate the effect of the action
        co2 = current_data["CO2_ppm"]
        adjusted_co2 = co2 - action[0] * 10  # Example: Action reduces CO2

        # Reward inversely proportional to CO2 levels
        reward = -np.abs(adjusted_co2)

        # Move to the next step
        self.current_step += 1
        done = self.current_step >= self.max_steps

        # Observation for the next step
        if not done:
            obs = self.data.iloc[self.current_step].values.astype(np.float32)
        else:
            obs = np.zeros(self.data.shape[1], dtype=np.float32)

        return obs, reward, done, {}

    def render(self, mode="human"):
        if self.current_step < len(self.data):
            print(f"Step: {self.current_step}, CO2_ppm: {self.data.iloc[self.current_step]['CO2_ppm']}")

    def seed(self, seed=None):
        np.random.seed(seed)


# Example dataset
data_dict = {
    "Temperature": [22, 23, 21, 20, 22],
    "Humidity": [30, 35, 40, 45, 50],
    "CO2_ppm": [400, 420, 390, 410, 405]
}
data = pd.DataFrame(data_dict)

# Create the environment
env = IoTSensorEnv(data)

# Train the PPO agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# Test the trained agent
obs = env.reset()
for step in range(len(data)):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()
    if done:
        break


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 5         |
|    ep_rew_mean     | -2.03e+03 |
| time/              |           |
|    fps             | 1745      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
----------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | -2.02e+03   |
| time/                   |             |
|    fps                  | 1241        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.003695074 |
|    clip_fraction        | 0.0169      |
|    clip_range           | 0.2         |
|    entro

In [35]:
from elasticsearch import Elasticsearch
from tensorflow.keras import models, layers
from collections import deque
import numpy as np
import random
import tensorflow as tf

# Elasticsearch connection
es = Elasticsearch("http://localhost:9200")
index_name = "iot_sensor"

# Function to retrieve IoT data from Elasticsearch
def retrieve_data(es_client, index):
    try:
        query = {"query": {"match_all": {}}}
        response = es_client.search(index=index, body=query, size=1000)
        data = [hit["_source"] for hit in response["hits"]["hits"]]
        print(f"Retrieved {len(data)} documents from index '{index}'.")
        return data
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return []

# DQNAgent definition
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.performance_history = []

    def _build_model(self):
        model = models.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def monitor_and_optimize(self, recent_rewards):
        avg_reward = np.mean(recent_rewards)
        self.performance_history.append(avg_reward)
        optimizations = []
        if avg_reward > np.mean(self.performance_history[:-1]):
            if self.epsilon_decay > 0.990:
                self.epsilon_decay -= 0.002
                optimizations.append(f"Reduced epsilon_decay to {self.epsilon_decay:.4f}")
            if self.learning_rate > 0.0005:
                self.learning_rate -= 0.0001
                optimizations.append(f"Reduced learning_rate to {self.learning_rate:.6f}")
        else:
            if self.epsilon_decay < 0.999:
                self.epsilon_decay += 0.001
                optimizations.append(f"Increased epsilon_decay to {self.epsilon_decay:.4f}")
            if self.learning_rate < 0.001:
                self.learning_rate += 0.00005
                optimizations.append(f"Increased learning_rate to {self.learning_rate:.6f}")
        return optimizations

# Simulated IoT environment
class CarbonEmissionsEnv:
    def __init__(self):
        self.state = None

    def reset(self):
        self.state = np.random.rand(4)
        return self.state

    def step(self, action):
        emission_reduction = action * 0.1
        cost_penalty = (3 - action) * 0.05
        efficiency_bonus = 0.2 if action == 2 else 0.0
        reward = emission_reduction - cost_penalty + efficiency_bonus
        next_state = np.random.rand(4)
        done = np.random.rand() < 0.1
        return next_state, reward, done, {}

# Main execution
if __name__ == "__main__":
    print(f"Retrieving data from Elasticsearch index '{index_name}'...")
    data = retrieve_data(es, index_name)

    state_size = 4
    action_size = 3
    env = CarbonEmissionsEnv()
    agent = DQNAgent(state_size, action_size)

    episodes = 100
    batch_size = 32
    reward_history = deque(maxlen=10)

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        total_reward = 0
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            if done:
                break
        reward_history.append(total_reward)
        optimizations = agent.monitor_and_optimize(reward_history)
        print(f"episode: {e}/{episodes}, score: {time}, total_reward: {total_reward:.2f}")
        if optimizations:
            print("Optimizations triggered:")
            for opt in optimizations:
                print(f"- {opt}")
        agent.replay(batch_size)


Retrieving data from Elasticsearch index 'iot_sensor'...
Retrieved 200 documents from index 'iot_sensor'.


  response = es_client.search(index=index, body=query, size=1000)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


episode: 0/100, score: 0, total_reward: -0.15
Optimizations triggered:
- Increased epsilon_decay to 0.9960
episode: 1/100, score: 14, total_reward: 1.55
Optimizations triggered:
- Reduced epsilon_decay to 0.9940
- Reduced learning_rate to 0.000900
episode: 2/100, score: 3, total_reward: 1.05
Optimizations triggered:
- Reduced epsilon_decay to 0.9920
- Reduced learning_rate to 0.000800
episode: 3/100, score: 11, total_reward: 0.80
Optimizations triggered:
- Reduced epsilon_decay to 0.9900
- Reduced learning_rate to 0.000700
episode: 4/100, score: 12, total_reward: 1.00
Optimizations triggered:
- Reduced learning_rate to 0.000600
episode: 5/100, score: 0, total_reward: -0.15
Optimizations triggered:
- Reduced learning_rate to 0.000500
episode: 6/100, score: 8, total_reward: 1.05
episode: 7/100, score: 4, total_reward: -0.45
Optimizations triggered:
- Increased epsilon_decay to 0.9910
- Increased learning_rate to 0.000550
episode: 8/100, score: 6, total_reward: 0.75
Optimizations triggere

In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel
import joblib

# Load dataset
data_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/esg_scores.csv"
data = pd.read_csv(data_path)

# Data preprocessing
data = data[['AQI', 'Environmental_Score', 'Social_Score', 'Governance_Score']].dropna()
X = data[['Environmental_Score', 'Social_Score', 'Governance_Score']]
y = data['AQI']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline creation
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures()),
    ('model', XGBRegressor(objective='reg:squarederror', random_state=42))
])

# Hyperparameter tuning
param_grid = {
    'poly__degree': [1, 2],
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

# Evaluation on test set
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Test MSE:", mse)
print("Test R2 Score:", r2)

# Cross-validation scores
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='r2')
print("Cross-validation R2 scores:", cv_scores)
print("Mean CV R2 score:", np.mean(cv_scores))

# Save the scaler
scaler = best_model.named_steps['scaler']
joblib.dump(scaler, "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/scaler.pkl")
print("Scaler saved as 'scaler.pkl'")

# Save the PolynomialFeatures transformer
poly_transformer = best_model.named_steps['poly']
joblib.dump(poly_transformer, "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/poly_transform.pkl")
print("Polynomial transformer saved as 'poly_transform.pkl'")

# Save the trained XGBoost model
xgb_model = best_model.named_steps['model']
joblib.dump(xgb_model, "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/xgb_model.pkl")
print("XGBoost model saved as 'xgb_model.pkl'")

# Load the pre-trained GPT-2 model for ESG suggestions
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

def generate_ml_suggestions(environmental_score, social_score, governance_score):
    """
    Generate suggestions for reducing AQI using a local LLM.
    """
    # Format input as a prompt for the LLM
    input_prompt = (
        f"Environmental Score: {environmental_score}, "
        f"Social Score: {social_score}, "
        f"Governance Score: {governance_score}. "
        f"What actions should be taken to improve air quality?"
    )

    # Tokenize input and generate text
    inputs = tokenizer.encode(input_prompt, return_tensors="pt")
    outputs = model.generate(
        inputs,
        max_length=100,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        temperature=0.7
    )

    # Decode the output
    suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return suggestion

# Example usage for generating suggestions
example_esg_scores = {
    'Environmental_Score': 80,
    'Social_Score': 60,
    'Governance_Score': 70
}

suggestion = generate_ml_suggestions(
    environmental_score=example_esg_scores['Environmental_Score'],
    social_score=example_esg_scores['Social_Score'],
    governance_score=example_esg_scores['Governance_Score']
)

print(f"Suggested Action: {suggestion}")


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.2s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END m

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Suggested Action: Environmental Score: 80, Social Score: 60, Governance Score: 70. What actions should be taken to improve air quality?

The government has taken steps to reduce air pollution in the country. The government is working to increase the number of air-quality monitoring stations in every city and town.
. . .
 (1) The Government of India has announced that it will increase air monitoring of the air in all cities and towns in India. (2) In the last two years


In [49]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel
import joblib

# Load dataset
data_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/esg_scores.csv"
data = pd.read_csv(data_path)

# Data preprocessing
data = data[['AQI', 'Environmental_Score', 'Social_Score', 'Governance_Score']].dropna()
X = data[['Environmental_Score', 'Social_Score', 'Governance_Score']]
y = data['AQI']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline creation
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures()),
    ('model', XGBRegressor(objective='reg:squarederror', random_state=42))
])

# Hyperparameter tuning
param_grid = {
    'poly__degree': [1, 2],
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_
print("\nBest parameters:", grid_search.best_params_)

# Evaluation on test set
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\nTest MSE:", mse)
print("Test R2 Score:", r2)

# Cross-validation scores
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='r2')
print("\nCross-validation R2 scores:", cv_scores)
print("Mean CV R2 score:", np.mean(cv_scores))

# Save the scaler
scaler = best_model.named_steps['scaler']
scaler_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/scaler.pkl"
joblib.dump(scaler, scaler_path)
print(f"\nScaler saved as '{scaler_path}'")

# Save the PolynomialFeatures transformer
poly_transformer = best_model.named_steps['poly']
poly_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/poly_transform.pkl"
joblib.dump(poly_transformer, poly_path)
print(f"Polynomial transformer saved as '{poly_path}'")

# Save the trained XGBoost model
xgb_model = best_model.named_steps['model']
xgb_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/xgb_model.pkl"
joblib.dump(xgb_model, xgb_path)
print(f"XGBoost model saved as '{xgb_path}'")

# Load the pre-trained GPT-2 model for ESG suggestions
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Ensure proper padding is set
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

def generate_ml_suggestions(environmental_score, social_score, governance_score):
    """
    Generate suggestions for improving air quality based on ESG scores using GPT-2.
    """
    input_prompt = (
        f"Environmental Score: {environmental_score}, "
        f"Social Score: {social_score}, "
        f"Governance Score: {governance_score}. "
        f"What actions should be taken to improve air quality?"
    )

    inputs = tokenizer.encode(input_prompt, return_tensors="pt")
    outputs = model.generate(
        inputs,
        max_length=100,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        temperature=0.7
    )

    suggestion = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return suggestion

# Example usage for generating suggestions
example_esg_scores = {
    'Environmental_Score': 80,
    'Social_Score': 60,
    'Governance_Score': 70
}

suggestion = generate_ml_suggestions(
    environmental_score=example_esg_scores['Environmental_Score'],
    social_score=example_esg_scores['Social_Score'],
    governance_score=example_esg_scores['Governance_Score']
)

print(f"\nSuggested Action: {suggestion}")


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END m

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Suggested Action: Environmental Score: 80, Social Score: 60, Governance Score: 70. What actions should be taken to improve air quality?

The government has taken steps to reduce air pollution in the country. The government is working to increase the number of air-quality monitoring stations in every city and town.
. . .
 (1) The Government of India has announced that it will increase air monitoring of the air in all cities and towns in India. (2) In the last two years


In [52]:
! pip install textblob


Collecting textblob
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting nltk>=3.9 (from textblob)
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m624.3/624.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Installing collected packages: nltk, textblob
Successfully installed nltk-3.9.1 textblob-0.19.0


In [53]:
from textblob import TextBlob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
import joblib

# Load dataset
data_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/esg_scores.csv"
data = pd.read_csv(data_path)

# Data preprocessing
data = data[['AQI', 'Environmental_Score', 'Social_Score', 'Governance_Score']].dropna()
X = data[['Environmental_Score', 'Social_Score', 'Governance_Score']]
y = data['AQI']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline creation
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures()),
    ('model', XGBRegressor(objective='reg:squarederror', random_state=42))
])

# Hyperparameter tuning
param_grid = {
    'poly__degree': [1, 2],
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Sentiment analysis function
def sentiment_analysis(text):
    """
    Perform sentiment analysis on a given text and return the polarity.
    """
    blob = TextBlob(text)
    return blob.sentiment.polarity

# Suggestion generator based on ESG scores and sentiment analysis
def generate_suggestions(environmental_score, social_score, governance_score):
    """
    Generate suggestions based on ESG scores and sentiment polarity.
    """
    input_prompt = (
        f"Environmental Score: {environmental_score}, "
        f"Social Score: {social_score}, "
        f"Governance Score: {governance_score}. "
    )

    # Analyze sentiment
    sentiment = sentiment_analysis(input_prompt)

    # Provide hardcoded suggestions based on sentiment polarity
    if sentiment > 0.5:
        suggestion = "Your ESG scores are strong! Focus on maintaining transparency and leveraging clean energy solutions."
    elif 0 <= sentiment <= 0.5:
        suggestion = (
            "Your ESG scores are moderate. Work on improving governance policies and social initiatives to enhance performance."
        )
    else:
        suggestion = (
            "Your ESG scores need improvement. Consider reducing emissions, implementing fair labor practices, and improving oversight."
        )

    return suggestion

# Example usage for generating suggestions
example_esg_scores = {
    'Environmental_Score': 80,
    'Social_Score': 60,
    'Governance_Score': 70
}

suggestion = generate_suggestions(
    environmental_score=example_esg_scores['Environmental_Score'],
    social_score=example_esg_scores['Social_Score'],
    governance_score=example_esg_scores['Governance_Score']
)

print(f"\nSuggested Action: {suggestion}")


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.2s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.1s
[CV] END m

In [54]:
from textblob import TextBlob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from elasticsearch import Elasticsearch
from tensorflow.keras import models, layers
from collections import deque
import random
import tensorflow as tf
import joblib

# Elasticsearch connection
es = Elasticsearch("http://localhost:9200")
index_name = "iot_sensor"

# Function to retrieve IoT data from Elasticsearch
def retrieve_data(es_client, index):
    try:
        query = {"query": {"match_all": {}}}
        response = es_client.search(index=index, body=query, size=1000)
        data = [hit["_source"] for hit in response["hits"]["hits"]]
        print(f"Retrieved {len(data)} documents from index '{index}'.")
        return data
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return []

# Load ESG dataset
data_path = "/Users/kdn_aikothalavanya/Desktop/KPMG Projects/carbonfootprint/esg_scores.csv"
data = pd.read_csv(data_path)

# Data preprocessing
data = data[['AQI', 'Environmental_Score', 'Social_Score', 'Governance_Score']].dropna()
X = data[['Environmental_Score', 'Social_Score', 'Governance_Score']]
y = data['AQI']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline creation
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures()),
    ('model', XGBRegressor(objective='reg:squarederror', random_state=42))
])

# Hyperparameter tuning
param_grid = {
    'poly__degree': [1, 2],
    'model__n_estimators': [100, 200],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Sentiment analysis function
def sentiment_analysis(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

# Suggestion generator based on ESG scores and sentiment analysis
def generate_suggestions(environmental_score, social_score, governance_score):
    input_prompt = (
        f"Environmental Score: {environmental_score}, "
        f"Social Score: {social_score}, "
        f"Governance Score: {governance_score}. "
    )

    # Analyze sentiment
    sentiment = sentiment_analysis(input_prompt)

    # Provide hardcoded suggestions based on sentiment polarity
    if sentiment > 0.5:
        suggestion = "Your ESG scores are strong! Focus on maintaining transparency and leveraging clean energy solutions."
    elif 0 <= sentiment <= 0.5:
        suggestion = (
            "Your ESG scores are moderate. Work on improving governance policies and social initiatives to enhance performance."
        )
    else:
        suggestion = (
            "Your ESG scores need improvement. Consider reducing emissions, implementing fair labor practices, and improving oversight."
        )

    return suggestion

# Simulated IoT environment for carbon emissions
class CarbonEmissionsEnv:
    def __init__(self):
        self.state = None

    def reset(self):
        self.state = np.random.rand(4)
        return self.state

    def step(self, action):
        emission_reduction = action * 0.1
        cost_penalty = (3 - action) * 0.05
        efficiency_bonus = 0.2 if action == 2 else 0.0
        reward = emission_reduction - cost_penalty + efficiency_bonus
        next_state = np.random.rand(4)
        done = np.random.rand() < 0.1
        return next_state, reward, done, {}

# DQNAgent definition
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = models.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Main execution
if __name__ == "__main__":
    print(f"Retrieving data from Elasticsearch index '{index_name}'...")
    data = retrieve_data(es, index_name)

    # ESG example usage
    example_esg_scores = {
        'Environmental_Score': 80,
        'Social_Score': 60,
        'Governance_Score': 70
    }
    suggestion = generate_suggestions(
        environmental_score=example_esg_scores['Environmental_Score'],
        social_score=example_esg_scores['Social_Score'],
        governance_score=example_esg_scores['Governance_Score']
    )
    print(f"\nSuggested Action: {suggestion}")

    # Reinforcement learning
    state_size = 4
    action_size = 3
    env = CarbonEmissionsEnv()
    agent = DQNAgent(state_size, action_size)

    episodes = 100
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                break
        agent.replay(batch_size)
        print(f"Episode {e+1}/{episodes} completed.")


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=1; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.0s
[CV] END model__learning_rate=0.01, model__max_depth=3, model__n_estimators=100, poly__degree=2; total time=   0.0s
[CV] END m

  response = es_client.search(index=index, body=query, size=1000)


Retrieved 200 documents from index 'iot_sensor'.

Suggested Action: Your ESG scores are moderate. Work on improving governance policies and social initiatives to enhance performance.
Episode 1/100 completed.
Episode 2/100 completed.
Episode 3/100 completed.
Episode 4/100 completed.
Episode 5/100 completed.
Episode 6/100 completed.


KeyboardInterrupt: 

In [56]:
import numpy as np
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from elasticsearch import Elasticsearch


class CarbonEmissionsEnv:
    def __init__(self):
        self.state = np.array([0.5, 0.5, 0.5, 0.5])  
        self.action_space = 3  

    def step(self, action):
        reward = 0
        if action == 0: 
            reward = 10
        elif action == 1:  
            reward = 5
        elif action == 2:  
            reward = -5

        self.state = np.random.rand(4)  
        done = random.random() > 0.95  

        return self.state, reward, done, {}


class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  
        self.epsilon = 1.0  
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


def retrieve_data(es, index_name):
    try:
        results = es.search(index=index_name, body={"query": {"match_all": {}}}, size=1000)
        return [hit["_source"] for hit in results["hits"]["hits"]]
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return []


def generate_suggestions(environmental_score, social_score, governance_score):
    avg_score = (environmental_score + social_score + governance_score) / 3
    if avg_score > 80:
        return "Excellent ESG performance. Focus on maintaining leadership."
    elif avg_score > 50:
        return "Good ESG performance. Consider investing in sustainability."
    else:
        return "ESG performance needs improvement. Prioritize actionable goals."


if __name__ == "__main__":
    
    es = Elasticsearch(["http://localhost:9200"])
    index_name = "iot_sensor"

    
    print(f"Retrieving data from Elasticsearch index '{index_name}'...")
    iot_data = retrieve_data(es, index_name)

    if iot_data:
        
        states = []
        for doc in iot_data:
            state = [
                doc.get('sensor1', 0.0),  
                doc.get('sensor2', 0.0),
                doc.get('sensor3', 0.0),
                doc.get('sensor4', 0.0)
            ]
            states.append(state)

        
        states = np.array(states)
        states = (states - np.min(states, axis=0)) / (np.max(states, axis=0) - np.min(states, axis=0))

        
        state_size = states.shape[1]
        action_size = 3  
        env = CarbonEmissionsEnv()
        agent = DQNAgent(state_size, action_size)

       
        for idx, state in enumerate(states):
            state = np.reshape(state, [1, state_size])
            action = agent.act(state)  
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])

           
            if action == 0:
                suggestion = "Reduce emissions by optimizing processes and investing in cleaner technology."
            elif action == 1:
                suggestion = "Maintain current efforts, but monitor for opportunities to improve efficiency."
            elif action == 2:
                suggestion = "Increase efforts towards emission reduction to meet sustainability targets."

            print(f"State {idx+1}: Action Taken: {action}, Reward: {reward:.2f}, Suggestion: {suggestion}")

            
            agent.remember(state, action, reward, next_state, done)
            if len(agent.memory) >= 32:  
                agent.replay(32)
    else:
        print("No data retrieved from Elasticsearch. Ensure the index contains documents.")

    
    example_esg_scores = {
        'Environmental_Score': 80,
        'Social_Score': 60,
        'Governance_Score': 70
    }
    suggestion = generate_suggestions(
        environmental_score=example_esg_scores['Environmental_Score'],
        social_score=example_esg_scores['Social_Score'],
        governance_score=example_esg_scores['Governance_Score']
    )
    print(f"\nSuggested Action for ESG Scores: {suggestion}")


Retrieving data from Elasticsearch index 'iot_sensor'...


  results = es.search(index=index_name, body={"query": {"match_all": {}}}, size=1000)
  states = (states - np.min(states, axis=0)) / (np.max(states, axis=0) - np.min(states, axis=0))


State 1: Action Taken: 1, Reward: 5.00, Suggestion: Maintain current efforts, but monitor for opportunities to improve efficiency.
State 2: Action Taken: 1, Reward: 5.00, Suggestion: Maintain current efforts, but monitor for opportunities to improve efficiency.
State 3: Action Taken: 2, Reward: -5.00, Suggestion: Increase efforts towards emission reduction to meet sustainability targets.
State 4: Action Taken: 1, Reward: 5.00, Suggestion: Maintain current efforts, but monitor for opportunities to improve efficiency.
State 5: Action Taken: 2, Reward: -5.00, Suggestion: Increase efforts towards emission reduction to meet sustainability targets.
State 6: Action Taken: 2, Reward: -5.00, Suggestion: Increase efforts towards emission reduction to meet sustainability targets.
State 7: Action Taken: 0, Reward: 10.00, Suggestion: Reduce emissions by optimizing processes and investing in cleaner technology.
State 8: Action Taken: 2, Reward: -5.00, Suggestion: Increase efforts towards emission re

In [57]:
! pip install textblob pandas numpy scikit-learn xgboost elasticsearch tensorflow web3


Collecting urllib3<2,>=1.21.1 (from elasticsearch)
  Using cached urllib3-1.26.20-py2.py3-none-any.whl.metadata (50 kB)
INFO: pip is looking at multiple versions of types-requests to determine which version is compatible with other requirements. This could take a while.
Collecting types-requests>=2.0.0 (from web3)
  Downloading types_requests-2.32.0.20250301-py3-none-any.whl.metadata (2.3 kB)
  Downloading types_requests-2.32.0.20241016-py3-none-any.whl.metadata (1.9 kB)
  Downloading types_requests-2.32.0.20240914-py3-none-any.whl.metadata (1.9 kB)
  Downloading types_requests-2.32.0.20240907-py3-none-any.whl.metadata (1.9 kB)
  Downloading types_requests-2.32.0.20240905-py3-none-any.whl.metadata (1.9 kB)
  Downloading types_requests-2.32.0.20240712-py3-none-any.whl.metadata (1.9 kB)
  Downloading types_requests-2.32.0.20240622-py3-none-any.whl.metadata (1.8 kB)
INFO: pip is still looking at multiple versions of types-requests to determine which version is compatible with other requir

In [73]:
! pip install web3 --upgrade




In [60]:
! pip install web3




In [61]:
from web3 import Web3

# Generate a new account
account = Web3().eth.account.create()

# Get the public wallet address and private key
wallet_address = account.address
private_key = account.key.hex()

# Display the wallet details
print(f"Wallet Address: {wallet_address}")
print(f"Private Key: {private_key}")

# Store these securely


Wallet Address: 0x0BB8E0d56C43CEda853F0FcdbEe5FdC1076d9f41
Private Key: 237bada3d4eb8bebc86d9c06b2201fbcb7e14f04bcfc067a59c8435ddd9fc2fc


In [84]:
import json
import random
import numpy as np
import elasticsearch
from web3 import Web3
from collections import deque

# Blockchain Configuration
INFURA_URL = "https://rpc.ankr.com/bsc/48b208604a24d6b44887751595a5f16a82814b1aecdb818edfd89f7a12da96b8"
web3 = Web3(Web3.HTTPProvider(INFURA_URL))
CONTRACT_ADDRESS = "0x1a4c3b6c7a08c0989Ff780514568ed85786aa659"
WALLET_ADDRESS = "0x1a4c3b6c7a08c0989Ff780514568ed85786aa659"
PRIVATE_KEY = "4c8d67458d12d22d8aebf2f8d13cba69afe3c01f77ab808e37ecd319b13b0bb9"

# Load Contract ABI
with open("CarbonCreditTokenABI.json", "r") as abi_file:
    CONTRACT_ABI = json.load(abi_file)
contract = web3.eth.contract(address=CONTRACT_ADDRESS, abi=CONTRACT_ABI)

# Elasticsearch Configuration
ES_HOST = "http://localhost:9200"
es_client = elasticsearch.Elasticsearch([ES_HOST])
INDEX_NAME = "iot_sensor"

# Define Deep Q-Learning Parameters
action_space = ["mint", "burn", "transfer"]
state_size = 4  # Example state size
action_size = len(action_space)
epsilon = 1.0
epsilon_min = 0.1
epsilon_decay = 0.995
learning_rate = 0.001
gamma = 0.95
episodes = 100
batch_size = 32
memory = deque(maxlen=2000)

# Helper Functions
def get_wallet_balance():
    try:
        balance = web3.eth.get_balance(WALLET_ADDRESS)
        eth_balance = web3.from_wei(balance, 'ether')  # Use the correct method
        print(f"Wallet Balance: {eth_balance} ETH")
        return eth_balance
    except Exception as e:
        print(f"Error retrieving wallet balance: {e}")
        return None

def send_transaction(receiver_address, amount_eth):
    try:
        amount_wei = Web3.toWei(amount_eth, 'ether')
        transaction = {
            'to': receiver_address,
            'value': amount_wei,
            'gas': 21000,
            'gasPrice': Web3.toWei('50', 'gwei'),
            'nonce': web3.eth.get_transaction_count(WALLET_ADDRESS),
        }
        signed_tx = web3.eth.account.sign_transaction(transaction, private_key=PRIVATE_KEY)
        txn_hash = web3.eth.send_raw_transaction(signed_tx.rawTransaction)
        print(f"Transaction sent! Hash: {web3.toHex(txn_hash)}")
        receipt = web3.eth.wait_for_transaction_receipt(txn_hash)
        print("Transaction confirmed.")
        return receipt
    except Exception as e:
        print(f"Error sending transaction: {e}")
        return None

def mint_tokens(to_address, amount):
    try:
        mint_tx = contract.functions.mint(to_address, amount).build_transaction({
            'from': WALLET_ADDRESS,
            'nonce': web3.eth.get_transaction_count(WALLET_ADDRESS),
            'gas': 2000000,
            'gasPrice': Web3.toWei('50', 'gwei'),
        })
        signed_tx = web3.eth.account.sign_transaction(mint_tx, private_key=PRIVATE_KEY)
        txn_hash = web3.eth.send_raw_transaction(signed_tx.rawTransaction)
        print(f"Mint transaction sent! Hash: {web3.toHex(txn_hash)}")
        receipt = web3.eth.wait_for_transaction_receipt(txn_hash)
        print("Mint transaction confirmed.")
        return receipt
    except Exception as e:
        print(f"Error minting tokens: {e}")
        return None

def query_elasticsearch():
    try:
        query = {"match_all": {}}
        response = es_client.search(index=INDEX_NAME, body={"query": query}, size=1000)
        hits = response["hits"]["hits"]
        data = [hit["_source"] for hit in hits]
        return data
    except Exception as e:
        print(f"Error querying Elasticsearch: {e}")
        return []

def replay(memory, model):
    if len(memory) < batch_size:
        return
    batch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in batch:
        target = reward
        if not done:
            target += gamma * np.amax(model.predict(next_state)[0])
        target_f = model.predict(state)
        target_f[0][action] = target
        model.fit(state, target_f, epochs=1, verbose=0)

def deep_q_learning():
    global epsilon
    for episode in range(episodes):
        state = np.random.rand(1, state_size)  # Replace with actual initial state
        done = False
        total_reward = 0

        while not done:
            if np.random.rand() <= epsilon:
                action = random.randrange(action_size)
            else:
                action = np.argmax(model.predict(state)[0])

            next_state = np.random.rand(1, state_size)  # Replace with actual state transition
            reward = random.randint(0, 10)  # Replace with actual reward calculation
            done = random.choice([True, False])  # Replace with actual terminal state condition

            memory.append((state, action, reward, next_state, done))
            state = next_state
            total_reward += reward

            if done:
                print(f"Episode {episode+1}/{episodes} - Total Reward: {total_reward}")
                break

        replay(memory, model)

        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

# Placeholder for DQN model
class DummyModel:
    def predict(self, state):
        return np.random.rand(1, action_size)

    def fit(self, state, target, epochs, verbose):
        pass

model = DummyModel()

def main():
    print("Starting Deep Q-Learning with Blockchain and Elasticsearch integration")
    wallet_balance = get_wallet_balance()
    if wallet_balance:
        print(f"Wallet balance: {wallet_balance} ETH")

    data = query_elasticsearch()
    print(f"Queried {len(data)} records from Elasticsearch")

    deep_q_learning()

if __name__ == "__main__":
    main()


Starting Deep Q-Learning with Blockchain and Elasticsearch integration
Wallet Balance: 0 ETH


  response = es_client.search(index=INDEX_NAME, body={"query": query}, size=1000)


Queried 200 records from Elasticsearch
Episode 1/100 - Total Reward: 6
Episode 2/100 - Total Reward: 7
Episode 3/100 - Total Reward: 13
Episode 4/100 - Total Reward: 0
Episode 5/100 - Total Reward: 18
Episode 6/100 - Total Reward: 5
Episode 7/100 - Total Reward: 10
Episode 8/100 - Total Reward: 2
Episode 9/100 - Total Reward: 9
Episode 10/100 - Total Reward: 7
Episode 11/100 - Total Reward: 13
Episode 12/100 - Total Reward: 13
Episode 13/100 - Total Reward: 6
Episode 14/100 - Total Reward: 0
Episode 15/100 - Total Reward: 4
Episode 16/100 - Total Reward: 27
Episode 17/100 - Total Reward: 36
Episode 18/100 - Total Reward: 22
Episode 19/100 - Total Reward: 1
Episode 20/100 - Total Reward: 15
Episode 21/100 - Total Reward: 8
Episode 22/100 - Total Reward: 12
Episode 23/100 - Total Reward: 9
Episode 24/100 - Total Reward: 10
Episode 25/100 - Total Reward: 9
Episode 26/100 - Total Reward: 6
Episode 27/100 - Total Reward: 14
Episode 28/100 - Total Reward: 7
Episode 29/100 - Total Reward: 13


In [None]:
import json
import random
import numpy as np
import elasticsearch
from web3 import Web3
from collections import deque
from textblob import TextBlob

INFURA_URL = "https://rpc.ankr.com/bsc/48b208604a24d6b44887751595a5f16a82814b1aecdb818edfd89f7a12da96b8"
web3 = Web3(Web3.HTTPProvider(INFURA_URL))
CONTRACT_ADDRESS = "0x1a4c3b6c7a08c0989Ff780514568ed85786aa659"
WALLET_ADDRESS = "0x1a4c3b6c7a08c0989Ff780514568ed85786aa659"
PRIVATE_KEY = "4c8d67458d12d22d8aebf2f8d13cba69afe3c01f77ab808e37ecd319b13b0bb9"

with open("CarbonCreditTokenABI.json", "r") as abi_file:
    CONTRACT_ABI = json.load(abi_file)
contract = web3.eth.contract(address=CONTRACT_ADDRESS, abi=CONTRACT_ABI)

ES_HOST = "http://localhost:9200"
es_client = elasticsearch.Elasticsearch([ES_HOST])
INDEX_NAME = "iot_sensor"


action_space = ["mint", "burn", "transfer"]
state_size = 4  # Example state size
action_size = len(action_space)
epsilon = 1.0
epsilon_min = 0.1
epsilon_decay = 0.995
learning_rate = 0.001
gamma = 0.95
episodes = 100
batch_size = 32
memory = deque(maxlen=2000)

def sentiment_analysis(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity


def generate_suggestions(environmental_score, social_score, governance_score):
    input_prompt = (
        f"Environmental Score: {environmental_score}, "
        f"Social Score: {social_score}, "
        f"Governance Score: {governance_score}."
    )
    sentiment = sentiment_analysis(input_prompt)
    if sentiment > 0.5:
        return "Strong ESG scores! Focus on maintaining transparency and leveraging clean energy solutions."
    elif 0 <= sentiment <= 0.5:
        return "Moderate ESG scores. Improve governance policies and social initiatives."
    else:
        return "Low ESG scores. Focus on emission reductions, fair labor practices, and oversight."

def get_wallet_balance():
    try:
        balance = web3.eth.get_balance(WALLET_ADDRESS)
        eth_balance = web3.from_wei(balance, 'ether')
        print(f"Wallet Balance: {eth_balance} ETH")
        return eth_balance
    except Exception as e:
        print(f"Error retrieving wallet balance: {e}")
        return None

def mint_tokens(to_address, amount):
    try:
        mint_tx = contract.functions.mint(to_address, amount).build_transaction({
            'from': WALLET_ADDRESS,
            'nonce': web3.eth.get_transaction_count(WALLET_ADDRESS),
            'gas': 2000000,
            'gasPrice': Web3.toWei('50', 'gwei'),
        })
        signed_tx = web3.eth.account.sign_transaction(mint_tx, private_key=PRIVATE_KEY)
        txn_hash = web3.eth.send_raw_transaction(signed_tx.rawTransaction)
        print(f"Mint transaction sent! Hash: {web3.toHex(txn_hash)}")
        receipt = web3.eth.wait_for_transaction_receipt(txn_hash)
        print("Mint transaction confirmed.")
        return receipt
    except Exception as e:
        print(f"Error minting tokens: {e}")
        return None


def query_elasticsearch():
    try:
        query = {"match_all": {}}
        response = es_client.search(index=INDEX_NAME, body={"query": query}, size=1000)
        hits = response["hits"]["hits"]
        data = [hit["_source"] for hit in hits]
        return data
    except Exception as e:
        print(f"Error querying Elasticsearch: {e}")
        return []


class DummyModel:
    def predict(self, state):
        return np.random.rand(1, action_size)

    def fit(self, state, target, epochs, verbose):
        pass

model = DummyModel()

# Deep Q-Learning
def replay(memory, model):
    if len(memory) < batch_size:
        return
    batch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in batch:
        target = reward
        if not done:
            target += gamma * np.amax(model.predict(next_state)[0])
        target_f = model.predict(state)
        target_f[0][action] = target
        model.fit(state, target_f, epochs=1, verbose=0)

def deep_q_learning():
    global epsilon
    for episode in range(episodes):
        state = np.random.rand(1, state_size)  
        done = False
        total_reward = 0

        while not done:
            if np.random.rand() <= epsilon:
                action = random.randrange(action_size)
            else:
                action = np.argmax(model.predict(state)[0])

            next_state = np.random.rand(1, state_size)  
            reward = random.randint(0, 10)  
            done = random.choice([True, False])  

            memory.append((state, action, reward, next_state, done))
            state = next_state
            total_reward += reward

            if done:
                print(f"Episode {episode+1}/{episodes} - Total Reward: {total_reward}")
                break

        replay(memory, model)

        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

def main():
    print("Starting Deep Q-Learning with Blockchain and Elasticsearch integration")
    
    
    environmental_score = 75
    social_score = 60
    governance_score = 65
    suggestions = generate_suggestions(environmental_score, social_score, governance_score)
    print("ESG Suggestions:", suggestions)

  
    wallet_balance = get_wallet_balance()
    if wallet_balance:
        print(f"Wallet balance: {wallet_balance} ETH")

    
    data = query_elasticsearch()
    print(f"Queried {len(data)} records from Elasticsearch")

    
    deep_q_learning()

if __name__ == "__main__":
    main()


Starting Deep Q-Learning with Blockchain and Elasticsearch integration
ESG Suggestions: Moderate ESG scores. Improve governance policies and social initiatives.
Wallet Balance: 0 ETH
Queried 200 records from Elasticsearch
Episode 1/100 - Total Reward: 8
Episode 2/100 - Total Reward: 8
Episode 3/100 - Total Reward: 7
Episode 4/100 - Total Reward: 17
Episode 5/100 - Total Reward: 7
Episode 6/100 - Total Reward: 5
Episode 7/100 - Total Reward: 3
Episode 8/100 - Total Reward: 26
Episode 9/100 - Total Reward: 5
Episode 10/100 - Total Reward: 4
Episode 11/100 - Total Reward: 1
Episode 12/100 - Total Reward: 6
Episode 13/100 - Total Reward: 3
Episode 14/100 - Total Reward: 13
Episode 15/100 - Total Reward: 2
Episode 16/100 - Total Reward: 10
Episode 17/100 - Total Reward: 3
Episode 18/100 - Total Reward: 20
Episode 19/100 - Total Reward: 6
Episode 20/100 - Total Reward: 21
Episode 21/100 - Total Reward: 23
Episode 22/100 - Total Reward: 6
Episode 23/100 - Total Reward: 17
Episode 24/100 - Tot

  response = es_client.search(index=INDEX_NAME, body={"query": query}, size=1000)
