In [None]:
# Read the data and collect all the necessary information 

In [5]:
import pandas as pd  
import re  
import matplotlib.pyplot as plt  
import seaborn as sns  

# Set the file path
def analyze_mgdp_columns():  
    try:  
        df = pd.read_csv('mgdp.csv', encoding='utf-8')  
        
        # Basic information  
        print(f"File basic information:")  
        print(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")  
        print("\nPreview of the first 5 rows:")  
        print(df.head())  
        metrics = []  
        industries = []  
        
        for col in df.columns:  
            metric_match = re.search(r'\((.*?)\)', col)  
            if metric_match:  
                metrics.append(metric_match.group(1))  
            
            # Extract industry sectors, usually the part before the parentheses  
            industry_match = re.search(r'^(.*?)\s*\(', col)  
            if industry_match and "Title" not in col:  
                industries.append(industry_match.group(1).strip())  
        
        # Statistics and analysis  
        unique_metrics = sorted(set(metrics))  
        unique_industries = sorted(set(industries))  
        
        print("\nMetric type analysis:")  
        print(f"There are {len(unique_metrics)} different types of metrics:")  
        for i, metric in enumerate(unique_metrics, 1):  
            print(f"{i}. {metric}")  
            count = metrics.count(metric)  
            print(f"   Occurrence count: {count} ({count/len(metrics)*100:.1f}%)")  
        
        print("\nIndustry sector analysis:")  
        print(f"There are {len(unique_industries)} different industry sectors:")  
        for i, industry in enumerate(unique_industries, 1):  
            print(f"{i}. {industry}")  
        
        # Analysis of metric and industry combinations  
        print("\nStandardized forms of metric types:")  
        metric_mapping = {  
            'Index 1dp': 'Index',  
            'period on period growth': 'MoM Growth',  
            'period on period 1 year ago growth': 'YoY Growth',  
            '3 month on 3 month growth': 'QoQ Growth',  
            '3m on 3m 1 year ago growth': 'QoQ YoY Growth',  
            'period on period contribution': 'MoM Contribution',  
            'period on period 1 year ago contribution': 'YoY Contribution',  
            '3 month on 3 month contribution': 'QoQ Contribution',  
            '3m on 3m 1 year ago contribution': 'QoQ YoY Contribution'  
        }  
        
        for original, simplified in metric_mapping.items():  
            if original in unique_metrics:  
                print(f"{original} → {simplified}")  
        
        return df, unique_metrics, unique_industries  
        
    except Exception as e:  
        print(f"Error reading the file: {e}")  
        return None, None, None  

# Execute the analysis  
df, metrics, industries = analyze_mgdp_columns()  

File basic information:
Rows: 109, Columns: 208

Preview of the first 5 rows:
      Title Gross Value Added - Monthly (Index 1dp) :CVM SA  \
0      CDID                                            ECY2   
1  2016 JAN                                            91.2   
2  2016 FEB                                            91.4   
3  2016 MAR                                            91.6   
4  2016 APR                                            92.1   

  Agriculture, Forestry and Fishing (Index 1dp) :CVM SA  \
0                                               ECY3      
1                                               90.6      
2                                                 88      
3                                               87.3      
4                                               87.5      

  Production Industries - Total (Index 1dp) :CVM SA  \
0                                              ECY4   
1                                              99.3   
2                      

In [None]:
# Data clean and re-class


In [None]:
#General Analysis 

In [8]:
import pandas as pd  
import re  

def analyze_file(file_path):  
    try:  
        # Read the first few rows to inspect structure  
        df_preview = pd.read_csv(file_path, nrows=5)  
        print("File preview (first 5 rows):")  
        print(df_preview.head())  
        print(f"\nColumns in the file: {df_preview.columns.tolist()}")  
        
        # Check if the file uses `Metric` and `Value` columns  
        if 'Metric' in df_preview.columns:  
            print("\nDetected `Metric` column — analyzing...")  
            # Load full dataset  
            df = pd.read_csv(file_path)  
            
            # Get unique metrics and their counts  
            unique_metrics = df['Metric'].value_counts()  
            print(f"\nFound {len(unique_metrics)} unique metrics in the `Metric` column:")  
            for i, (metric, count) in enumerate(unique_metrics.items(), 1):  
                print(f"{i}. {metric} - {count} occurrences")  
        
        else:  
            print("\nNo `Metric` column detected — analyzing column names...")  
            # Analyze metrics embedded in column names  
            metrics = []  
            for col in df_preview.columns:  
                metric_match = re.search(r'\((.*?)\)', col)  
                if metric_match:  
                    metrics.append(metric_match.group(1))  
            
            # Get unique metrics and their counts  
            unique_metrics = pd.Series(metrics).value_counts()  
            print(f"\nFound {len(unique_metrics)} unique metrics in column names:")  
            for i, (metric, count) in enumerate(unique_metrics.items(), 1):  
                print(f"{i}. {metric} - {count} occurrences")  
        
        return df_preview, unique_metrics  
    
    except Exception as e:  
        print(f"Error reading the file: {e}")  
        return None, None  

# Provide the file path  
file_path = "uk_gdp_clean.csv"  
df, metrics = analyze_file(file_path)  

File preview (first 5 rows):
         Date                     Industry Metric  Value
0  01/01/2016  Gross Value Added - Monthly  Index   91.2
1  01/02/2016  Gross Value Added - Monthly  Index   91.4
2  01/03/2016  Gross Value Added - Monthly  Index   91.6
3  01/04/2016  Gross Value Added - Monthly  Index   92.1
4  01/05/2016  Gross Value Added - Monthly  Index   91.9

Columns in the file: ['Date', 'Industry', 'Metric', 'Value']

Detected `Metric` column — analyzing...

Found 8 unique metrics in the `Metric` column:
1. Index - 5832 occurrences
2. MoM Growth - 2376 occurrences
3. QoQ Growth - 2376 occurrences
4. QoQ YoY Growth - 2376 occurrences
5. QoQ Contribution - 2376 occurrences
6. MoM Contribution - 2376 occurrences
7. QoQ YoY Contribution - 2376 occurrences
8. YoY Growth - 2268 occurrences
