**OOP Assignment one
John Tebasiima
B35101
Data Science & Analytics**

Importing relevant modules for the assignment

In [None]:
import numpy as np
import statistics as stats
import matplotlib.pyplot as plt
from datetime import date, datetime, timedelta

# Some optional imports used for solving linear systems
try:
    from scipy import linalg
except Exception:
    linalg = None

# For reproducible plotting in notebooks
%matplotlib inline


**Question one: UBOS Multi-District Population & Growth Forecast**

In [None]:
# 1.1 Store the data for each district in a NumPy array.
Kampala = np.array([1200, 1250, 1300, 1350, 1420, 1500, 1580, 1650, 1720, 1800])
Wakiso  = np.array([950, 1000, 1070, 1150, 1220, 1300, 1390, 1480, 1570, 1670])
Gulu    = np.array([320, 330, 345, 360, 375, 390, 410, 430, 455, 480])

Kampala, Wakiso, Gulu


# 1.2 Use the statistics module to compute mean, median, variance, and standard deviation for each district.
def describe(arr):
    return {
        'mean': stats.mean(arr),
        'median': stats.median(arr),
        'variance': stats.pvariance(arr) if len(arr) > 1 else 0.0,  # population variance
        'stdev': stats.pstdev(arr) if len(arr) > 1 else 0.0
    }

desc_k = describe(Kampala)
desc_w = describe(Wakiso)
desc_g = describe(Gulu)

desc_k, desc_w, desc_g


# 1.3 Generate a Fibonacci sequence of length 5 to project the next 5 years of growth.

# Generate Fibonacci numbers (length 5)
def fib(n):
    if n <= 0:
        return []
    seq = [0, 1]
    while len(seq) < n:
        seq.append(seq[-1] + seq[-2])
    return seq[:n]

fib5 = fib(5)
fib5

# A simple projection: take the last year's population and add Fibonacci numbers scaled down
def project_population(actual, fib_seq, scale=1.0):
    last = actual[-1]
    projections = []
    for f in fib_seq:
        # scale the fib number relative to the last value
        increment = int(round(f * scale))
        last = last + increment
        projections.append(last)
    return np.array(projections)


proj_k = project_population(Kampala, fib5, scale=10)   
proj_w = project_population(Wakiso, fib5, scale=8)
proj_g = project_population(Gulu, fib5, scale=3)

proj_k, proj_w, proj_g


# 1.4 Compare the variance of actual vs projected data.


def variance_compare(actual, projected):
    var_actual = stats.pvariance(actual)
    var_proj = stats.pvariance(projected)
    return var_actual, var_proj, var_proj - var_actual

var_comp_k = variance_compare(Kampala, proj_k)
var_comp_w = variance_compare(Wakiso, proj_w)
var_comp_g = variance_compare(Gulu, proj_g)

var_comp_k, var_comp_w, var_comp_g


# 1.5 Plot actual vs projected populations using Matplotlib.


years_actual = np.arange(1, len(Kampala)+1)
years_proj = np.arange(len(Kampala)+1, len(Kampala)+1+len(proj_k))

plt.figure(figsize=(10,6))
plt.plot(years_actual, Kampala, marker='o', label='Kampala actual')
plt.plot(years_proj, proj_k, marker='x', linestyle='--', label='Kampala projected')
plt.plot(years_actual, Wakiso, marker='o', label='Wakiso actual')
plt.plot(years_proj, proj_w, marker='x', linestyle='--', label='Wakiso projected')
plt.plot(years_actual, Gulu, marker='o', label='Gulu actual')
plt.plot(years_proj, proj_g, marker='x', linestyle='--', label='Gulu projected')
plt.xlabel('Year index')
plt.ylabel('Population (thousands)')
plt.title('Actual vs Projected Population')
plt.legend()
plt.grid(True)
plt.show()

**Question 2: Solar Micro-Grid Simulation**

In [None]:
# 2.1 Prompt the user to enter daily demand values D1 and D2.

example_demands = [(30, 20), (32, 22), (28, 18), (35, 25), (31, 21), (29, 19), (33, 23)]

# 2.2 Use scipy.linalg.solve() to calculate energy from solar panels (x) and batteries (y).
# Equations: 3x + 2y = D1 and 4x + y = D2

if linalg is None:
    print("scipy not available; falling back to numpy.linalg")
    from numpy.linalg import solve as nsolve
    solver = nsolve
else:
    solver = linalg.solve

results = []
for D1, D2 in example_demands:
    A = np.array([[3,2],[4,1]], dtype=float)
    b = np.array([D1, D2], dtype=float)
    sol = solver(A, b)
    results.append(sol)

results = np.array(results)
results


# 2.3 Repeat for 7 days, storing results in a NumPy array (done above).
solar = results[:,0]
battery = results[:,1]

# 2.4 Use statistics to compute mean, variance, and standard deviation of solar vs battery usage.
stats_summary = {
    'solar_mean': stats.mean(solar),
    'solar_var': stats.pvariance(solar),
    'solar_stdev': stats.pstdev(solar),
    'battery_mean': stats.mean(battery),
    'battery_var': stats.pvariance(battery),
    'battery_stdev': stats.pstdev(battery),
}
stats_summary


# 2.5 Plot daily solar vs battery usage.
plt.figure(figsize=(8,5))
plt.plot(np.arange(1, len(solar)+1), solar, marker='o', label='solar (x)')
plt.plot(np.arange(1, len(battery)+1), battery, marker='x', label='battery (y)')
plt.xlabel('Day')
plt.ylabel('Units')
plt.title('Daily Solar vs Battery Usage')
plt.legend()
plt.grid(True)
plt.show()


**Question 3: Lake Victoria Fish Export Risk Model**

In [None]:
# 3.1 Generate 15 Fibonacci numbers to simulate fish stock growth.

fib15 = fib(15)

# Convert to a growth-like sequence by making them positive and scaled

stock = np.array(fib15) * 100  # scale to represent kg (example)

# 3.2 Simulate fish prices in UGX per kg (15 values). We'll create an example series.
prices = np.array([12000, 12500, 11800, 13000, 12800, 12700, 13100, 12900, 13500, 13300, 13200, 13050, 12850, 12950, 13150])

# 3.3 Multiply stock × price to estimate daily revenue.
revenue = stock * prices

# 3.4 Compute mean, median, variance of revenue using statistics.
revenue_stats = {
    'mean': stats.mean(revenue),
    'median': stats.median(revenue),
    'variance': stats.pvariance(revenue),
}

# 3.5 Print 'High risk' if variance > 50,000, else 'Low risk'.
risk_label = 'High risk' if revenue_stats['variance'] > 50000 else 'Low risk'

revenue_stats, risk_label


# 3.6 Plot fish stock vs revenue trend.
plt.figure(figsize=(10,5))
plt.plot(stock, label='Fish stock (kg)', marker='o')
plt.plot(revenue/1e6, label='Revenue (million UGX)', marker='x')
plt.xlabel('Day index')
plt.title('Fish Stock vs Revenue')
plt.legend()
plt.grid(True)
plt.show()


**Question 4: Weather & Agriculture Analysis**

In [None]:


# Question 4: Weather & Agriculture Analysis


# 4.1 Store rainfall data in a NumPy array.
rain_kampala = np.array([120, 140, 180, 200, 220, 180, 90, 70, 60, 100, 110, 130])

# 4.2 Compute mean rainfall and classify months as 'Good for maize' (>150mm) or 'Drought risk' (<150mm).
mean_rain_k = stats.mean(rain_kampala)
classification = ['Good for maize' if v > 150 else 'Drought risk' for v in rain_kampala]

mean_rain_k, classification


# 4.3 Simulate Gulu rainfall with different values (example)
rain_gulu = np.array([100, 120, 160, 180, 210, 170, 95, 80, 70, 105, 115, 125])

# 4.4 Compute cosine similarity of Kampala vs Gulu rainfall trends using math.cos().

# Cosine similarity = (A·B) / (|A||B|)
import math

def cosine_similarity(a, b):
    dot = float(np.dot(a,b))
    norma = math.sqrt(float(np.dot(a,a)))
    normb = math.sqrt(float(np.dot(b,b)))
    if norma == 0 or normb == 0:
        return 0.0
    return dot / (norma * normb)

cos_sim = cosine_similarity(rain_kampala, rain_gulu)
cos_sim

# %%
# 4.5 Plot both regions' rainfall in one chart.
plt.figure(figsize=(10,5))
plt.plot(rain_kampala, marker='o', label='Kampala')
plt.plot(rain_gulu, marker='x', label='Gulu')
plt.xlabel('Month index')
plt.ylabel('Rainfall (mm)')
plt.title('Rainfall: Kampala vs Gulu')
plt.legend()
plt.grid(True)
plt.show()


**Question 5: Taxi Transport Revenue & Variability**

In [None]:
# 5.1 Store passenger counts in a NumPy array.

passengers = np.array([35, 40, 42, 50, 55, 60, 48, 52, 47, 45])

# 5.2 Compute daily revenues at UGX 2000 per passenger.
fare = 2000
revenues = passengers * fare

# 5.3 Use statistics to analyze mean, variance, and std deviation.
revenue_stats_taxi = {
    'mean': stats.mean(revenues),
    'variance': stats.pvariance(revenues),
    'stdev': stats.pstdev(revenues)
}

# 5.4 Use scipy.linalg.solve to model a simple supply-demand system.

A = np.array([[1, -0.1],[ -0.2, 1]], dtype=float)  # toy coefficients
b = np.array([np.mean(passengers), np.mean(revenues)/fare], dtype=float)
try:
    model_sol = (linalg.solve(A, b) if linalg is not None else nsolve(A, b))
except Exception as e:
    model_sol = None

# 5.5 Forecast 11th day's revenue using average of last 3 days.
forecast_11 = int(round(np.mean(revenues[-3:])))

# 5.6 Plot actual vs forecasted revenues.
plt.figure(figsize=(9,5))
plt.plot(np.arange(1, len(revenues)+1), revenues, marker='o', label='Actual revenues')
plt.scatter([11], [forecast_11], color='red', label='Forecast day 11')
plt.xlabel('Day')
plt.ylabel('Revenue (UGX)')
plt.title('Actual vs Forecasted Revenue')
plt.legend()
plt.grid(True)
plt.show()

revenue_stats_taxi, model_sol, forecast_11
