# Tender Data Analysis
A Jupyter notebook to analyze procurement and tender data from the `gare_easy_export_20260121.csv` file, focusing on categories, timelines, and contracting authorities.

## 1. Setup and Data Loading
Import necessary libraries and load the dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set visualization style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [10, 6]

# Load the dataset
# Adjust the path if the file is located elsewhere
file_path = 'gare_easy_export_20260121.csv' 

try:
    df = pd.read_csv(file_path)
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print(f"File not found at {file_path}. Please check the file path.")

# Display first few rows and data types
if 'df' in locals():
    print(df.info())
    display(df.head())

## 2. Data Cleaning and Preprocessing
Convert date columns to datetime objects and handle missing values.

In [None]:
if 'df' in locals():
    # List of date columns to convert
    date_cols = ['Publication date', 'Deadline', 'Last update date', 'Evaluation date']
    
    for col in date_cols:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors='coerce')
            
    # Handle missing values
    # Fill missing categories with 'Unknown'
    if 'Tender category' in df.columns:
        df['Tender category'] = df['Tender category'].fillna('Unknown')
        
    # Fill missing Contracting Authority with 'Unknown'
    if 'Contracting authority (SA)' in df.columns:
        df['Contracting authority (SA)'] = df['Contracting authority (SA)'].fillna('Unknown')
    
    print("Data types after conversion:")
    print(df.dtypes)
    
    # Check for remaining missing values
    print("\nMissing values per column:")
    print(df.isnull().sum())

## 3. Tender Category Analysis
Analyze the distribution of tenders by category.

In [None]:
if 'df' in locals():
    plt.figure(figsize=(10, 6))
    ax = sns.countplot(data=df, x='Tender category', order=df['Tender category'].value_counts().index)
    plt.title('Distribution of Tenders by Category')
    plt.xlabel('Category')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    
    # Add labels
    for i in ax.containers:
        ax.bar_label(i,)
        
    plt.tight_layout()
    plt.show()

## 4. Temporal Analysis of Tenders
Analyze tender durations and upcoming deadlines.

In [None]:
if 'df' in locals():
    # Calculate duration where both dates are available
    valid_dates = df.dropna(subset=['Publication date', 'Deadline']).copy()
    valid_dates['Duration (Days)'] = (valid_dates['Deadline'] - valid_dates['Publication date']).dt.days
    
    if not valid_dates.empty:
        plt.figure(figsize=(10, 6))
        sns.histplot(valid_dates['Duration (Days)'], bins=20, kde=True)
        plt.title('Distribution of Tender Duration (Publication to Deadline)')
        plt.xlabel('Days')
        plt.ylabel('Frequency')
        plt.show()
    
    # Plot upcoming deadlines
    upcoming = df[df['Deadline'] > datetime.now()].copy()
    if not upcoming.empty:
        upcoming['Deadline Month'] = upcoming['Deadline'].dt.to_period('M').astype(str)
        upcoming_counts = upcoming['Deadline Month'].value_counts().sort_index()
        
        plt.figure(figsize=(12, 6))
        sns.barplot(x=upcoming_counts.index, y=upcoming_counts.values)
        plt.title('Upcoming Tender Deadlines by Month')
        plt.xlabel('Month')
        plt.ylabel('Number of Tenders')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

## 5. Geographic Distribution of Tenders
Analyze the location of tender execution.

In [None]:
if 'df' in locals() and 'Place of execution' in df.columns:
    # Clean up place names if necessary (simple strip)
    df['Place of execution'] = df['Place of execution'].str.strip()
    
    # Get top locations
    top_locations = df['Place of execution'].value_counts().head(10)
    
    if not top_locations.empty:
        plt.figure(figsize=(12, 6))
        plt.pie(top_locations.values, labels=top_locations.index, autopct='%1.1f%%', startangle=140)
        plt.title('Top 10 Places of Execution')
        plt.axis('equal')
        plt.show()
    else:
        print("No valid place of execution data found.")

## 6. Contracting Authority Analysis
Identify the mostly active contracting authorities.

In [None]:
if 'df' in locals():
    # Filter out 'Unknown' if desired, or keep to see missing data impact
    authorities = df[df['Contracting authority (SA)'] != 'Unknown']['Contracting authority (SA)'].value_counts().head(10)
    
    if not authorities.empty:
        plt.figure(figsize=(12, 8))
        sns.barplot(y=authorities.index, x=authorities.values, orient='h')
        plt.title('Top 10 Contracting Authorities')
        plt.xlabel('Number of Tenders')
        plt.ylabel('Authority')
        plt.tight_layout()
        plt.show()
    else:
        print("No contracting authority data found.")