In [19]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt


In [5]:
# Load the dataset
df = pd.read_csv('data/dailypowerrenewable.csv')  # Replace with your dataset path
# Inspect the first few rows and basic info
print(df.head())
print(df.info())


         source     NR     WR     SR     ER   NER  All India        date
0         Hydro  139.0   43.0   72.0   30.0   7.0        292  2013-03-31
1         Total  675.0  820.0  697.0  306.0  28.0       2526  2013-03-31
2  Wind Gen(MU)    2.0   19.0   13.0    0.0   0.0         34  2013-03-31
3         Hydro  137.0   43.0   83.0   32.0   5.0        300  2013-04-01
4         Total  683.0  841.0  706.0  316.0  29.0       2575  2013-04-01
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18251 entries, 0 to 18250
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   source     18251 non-null  object 
 1   NR         18251 non-null  float64
 2   WR         18251 non-null  float64
 3   SR         18251 non-null  float64
 4   ER         18251 non-null  float64
 5   NER        18251 non-null  float64
 6   All India  18251 non-null  int64  
 7   date       18251 non-null  object 
dtypes: float64(5), int64(1), object(2)
memory usage: 

In [6]:
# Check for null values
print(df.isnull().sum())

# Drop rows with null values (or fill them if needed)
df.dropna(inplace=True)  # Change to fillna() to fill instead of drop

# Fix the date column
df['date'] = pd.to_datetime(df['date'], errors='coerce')  # Convert to datetime
df.dropna(subset=['date'], inplace=True)  # Drop rows where date conversion failed


source       0
NR           0
WR           0
SR           0
ER           0
NER          0
All India    0
date         0
dtype: int64


In [8]:
# Convert relevant columns to numeric
columns_to_convert = ['NR', 'WR', 'SR', 'ER', 'NER', 'All India']
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

# Create new features (year, month) if useful for your analysis
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month


In [14]:
# Step 7: Visualization of Historical Data
import plotly.io as pio
pio.renderers.default = 'notebook'
pio.renderers.default = 'iframe'

# Create a time series line plot of energy generation
fig = px.line(df_grouped, x='date', y='All India', title='All India Renewable Energy Generation Over Time')

# Show the figure
fig.show()



In [16]:
# Step 3: Prepare the features and target
# Using all columns except 'date' as features
X = df_grouped.drop(columns=['date', 'All India'])  # Drop 'date' and the target variable
y = df_grouped['All India']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
# Step 4: Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)


In [20]:
# Step 5: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 3620.407266094421
R^2 Score: 0.9993692793991901


In [21]:
# Example future input based on last known values
# Create a DataFrame for future dates; here we assume we know the values for the features
# Adjust this based on your actual feature set.
future_input = pd.DataFrame({
    'NR': [100],  # Replace with expected future values
    'WR': [50],
    'SR': [70],
    'ER': [20],
    'NER': [5],
})

# Make a prediction
future_predictions = model.predict(future_input)

print(f'Future Predictions: {future_predictions}')


Future Predictions: [574.51]


In [24]:
import plotly.graph_objects as go

# Create a line chart using Plotly
fig = go.Figure()

# Add actual data points (if available)
fig.add_trace(go.Scatter(
    x=years_extended[:-2],  # Assuming past years
    y=y_pred,  # Replace y_pred with actual historical data if available
    mode='lines+markers',
    name='Historical Data'
))

# Add predictions for future years
fig.add_trace(go.Scatter(
    x=years_extended[-2:],  # Only future years (2024, 2025)
    y=future_predictions,  # Predicted values for future years
    mode='lines+markers',
    name='Predictions'
))

# Update layout
fig.update_layout(
    title='Renewable Energy Generation Predictions',
    xaxis_title='Year',
    yaxis_title='Energy Generation (All India)',
    legend_title='Legend',
    template='plotly',
    showlegend=True
)

# Show the figure
fig.show()


In [27]:
import pandas as pd
import numpy as np
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load your dataset
data = pd.read_csv('data/dailypowerrenewable.csv')

# Data cleaning
data['date'] = pd.to_datetime(data['date'], errors='coerce')
data.dropna(inplace=True)

# Ensure that the 'All India' column is numeric
data['All India'] = pd.to_numeric(data['All India'], errors='coerce')

# Extract year from the date for analysis
data['year'] = data['date'].dt.year

# Aggregate data by year
annual_data = data.groupby('year')['All India'].sum().reset_index()

# Prepare features and target for prediction
X = annual_data['year'].values.reshape(-1, 1)
y = annual_data['All India'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X)

# Extend years for future predictions
future_years = np.array([[2024], [2025]])
future_predictions = model.predict(future_years)

# Combine actual and predicted values for visualization
years = np.append(X, future_years)
all_values = np.append(y, future_predictions)

# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Renewable Energy Generation Predictions"),
    
    dcc.Graph(id='prediction-graph'),

    # Dropdown for selecting data type
    dcc.Dropdown(
        id='data-type',
        options=[
            {'label': 'Actual Values', 'value': 'actual'},
            {'label': 'Predicted Values', 'value': 'predicted'}
        ],
        value='actual'
    )
])

@app.callback(
    Output('prediction-graph', 'figure'),
    [Input('data-type', 'value')]
)
def update_graph(selected_data):
    # Create the figure for the selected data
    fig = go.Figure()

    # Add actual values
    fig.add_trace(go.Scatter(x=annual_data['year'], y=annual_data['All India'], mode='lines+markers', name='Actual Values'))

    # Add predictions for future years
    fig.add_trace(go.Scatter(x=[2024, 2025], y=future_predictions, mode='lines+markers', name='Predicted Values', line=dict(dash='dash')))

    # Update layout
    fig.update_layout(
        title='Renewable Energy Generation Predictions',
        xaxis_title='Year',
        yaxis_title='Energy Generation (MU)',
        template='plotly',
        legend=dict(x=0, y=1)
    )

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


In [26]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Select
from bokeh.io import curdoc
from bokeh.layouts import column

# Sample data
years = list(range(2013, 2026))
actual_values = [292, 2526, 2558, 2551, 2586, 2600, 2620, 2650, 2680, 2700, 2750, 2800, 2850]
future_predictions = [2900, 2950]

# Create a ColumnDataSource
source = ColumnDataSource(data=dict(years=years[:-2], values=actual_values[:-2]))

# Create a figure
p = figure(title="Renewable Energy Generation Predictions", x_axis_label='Year', y_axis_label='Energy Generation (MU)')
p.line('years', 'values', source=source, line_width=2, color='blue', legend_label='Actual Values')

# Callback function to update the plot
def update_plot(attr, old, new):
    if new == 'Predicted':
        source.data = dict(years=[2024, 2025], values=future_predictions)
        p.title.text = "Predicted Renewable Energy Generation"
    else:
        source.data = dict(years=years[:-2], values=actual_values[:-2])
        p.title.text = "Actual Renewable Energy Generation"

# Dropdown menu for user selection
select = Select(title="Select Data Type:", value="Actual", options=["Actual", "Predicted"])
select.on_change('value', update_plot)

# Layout
layout = column(select, p)

# Add the layout to the current document
curdoc().add_root(layout)

# Show the plot
show(layout)


You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/js_callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html

