# Crop Data Analysis

This notebook contains analysis of crop data, including:
1. Data Loading and Preprocessing
2. Exploratory Data Analysis
3. Statistical Analysis
4. Data Visualization
5. Feature Analysis

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import json

# Set plotting style
plt.style.use('seaborn')
sns.set_palette("husl")

# Data Loading and Preprocessing

First, we'll load the crop market history data and crop characteristics data from our data files.

In [None]:
# Load market history data
market_history = pd.read_csv('../data/crop_market_history.csv')

# Load crop characteristics data
with open('../data/sample_data.json', 'r') as f:
    crop_data = json.load(f)

print("Market History Data Shape:", market_history.shape)
print("\nMarket History Columns:", market_history.columns.tolist())
print("\nCrops in characteristics data:", list(crop_data.keys()))

# Exploratory Data Analysis

Let's analyze the market history data to understand price trends and trading patterns.

In [None]:
# Basic statistics of price and volume
price_stats = market_history.groupby('crop')['price_per_ton'].describe()
volume_stats = market_history.groupby('crop')['volume_traded'].describe()

print("Price Statistics (per ton):")
print(price_stats)
print("\nVolume Statistics:")
print(volume_stats)

In [None]:
# Plot price trends
plt.figure(figsize=(12, 6))
for crop in market_history['crop'].unique():
    crop_data = market_history[market_history['crop'] == crop]
    plt.plot(pd.to_datetime(crop_data['date']), crop_data['price_per_ton'], label=crop, marker='o')

plt.title('Crop Price Trends')
plt.xlabel('Date')
plt.ylabel('Price per Ton')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

# Statistical Analysis

Let's perform some statistical analysis on the price and volume data.

In [None]:
# Calculate correlation between price and volume
correlations = {}
for crop in market_history['crop'].unique():
    crop_data = market_history[market_history['crop'] == crop]
    corr = stats.pearsonr(crop_data['price_per_ton'], crop_data['volume_traded'])
    correlations[crop] = {'correlation': corr[0], 'p_value': corr[1]}

print("Price-Volume Correlations:")
for crop, stats_data in correlations.items():
    print(f"\n{crop}:")
    print(f"Correlation coefficient: {stats_data['correlation']:.4f}")
    print(f"P-value: {stats_data['p_value']:.4f}")

# Crop Characteristics Analysis

Now let's analyze the characteristics of different crops from our sample data.

In [None]:
# Extract temperature ranges
temp_ranges = {
    crop: {
        'min_temp': data['temperature_range']['min'],
        'max_temp': data['temperature_range']['max']
    }
    for crop, data in crop_data.items()
}

# Create temperature range plot
crops = list(temp_ranges.keys())
min_temps = [temp_ranges[crop]['min_temp'] for crop in crops]
max_temps = [temp_ranges[crop]['max_temp'] for crop in crops]

plt.figure(figsize=(10, 6))
plt.hlines(y=range(len(crops)), xmin=min_temps, xmax=max_temps, color='blue', alpha=0.4, linewidth=8)
plt.plot(min_temps, range(len(crops)), 'o', color='blue', label='Minimum Temperature')
plt.plot(max_temps, range(len(crops)), 'o', color='red', label='Maximum Temperature')

plt.yticks(range(len(crops)), crops)
plt.xlabel('Temperature (°C)')
plt.title('Temperature Ranges for Different Crops')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Analyze growth periods
growth_data = {
    crop: {
        'total_days': data['growth_period']['days'],
        'stages': len(data['growth_period']['stages'])
    }
    for crop, data in crop_data.items()
}

# Create DataFrame for growth period analysis
growth_df = pd.DataFrame(growth_data).T
growth_df.columns = ['Total Days', 'Number of Stages']

# Plot growth period comparison
plt.figure(figsize=(10, 6))
ax = growth_df['Total Days'].plot(kind='bar')
plt.title('Growth Period Comparison')
plt.xlabel('Crop')
plt.ylabel('Days to Maturity')
plt.xticks(rotation=45)

# Add value labels on top of bars
for i, v in enumerate(growth_df['Total Days']):
    ax.text(i, v, str(v), ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Conclusions

Based on our analysis:
1. Price trends show variations across different crops
2. There are correlations between price and trading volume
3. Each crop has specific temperature requirements and growth periods
4. Market dynamics differ by location and season