### summary_statistics

In [None]:
import pandas as pd

def calculate_summary_statistics(data):
    """
    Calculate mean, median, standard deviation, and other stats for numeric columns.
    """
    stats = data.describe(include='all')
    print(stats)
    return stats

### data_quality_checks

In [None]:
def check_data_quality(data):
    """
    Check for missing values, outliers, and incorrect entries.
    """
    print("Missing Values:\n", data.isnull().sum())
    print("\nNegative Values Check:")
    for col in data.select_dtypes(include=['float', 'int']):
        print(f"{col}: {(data[col] < 0).sum()} negative values")

### time_series_analysis


In [None]:
import matplotlib.pyplot as plt

def plot_time_series(data, columns, time_column='timestamp'):
    """
    Plot time series data for the specified columns.
    """
    for col in columns:
        plt.figure(figsize=(10, 6))
        plt.plot(data[time_column], data[col], label=col)
        plt.title(f"Time Series of {col}")
        plt.xlabel("Time")
        plt.ylabel(col)
        plt.legend()
        plt.show()

### correlation_analysis

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_correlation_matrix(data, columns):
    """
    Plot a correlation matrix for specified columns.
    """
    correlation = data[columns].corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation, annot=True, cmap='coolwarm')
    plt.title("Correlation Matrix")
    plt.show()

### wind_temp_analysis

In [None]:
import matplotlib.pyplot as plt

def analyze_wind_and_temperature(data):
    """
    Analyze wind trends and temperature influences.
    """
    plt.figure(figsize=(8, 6))
    plt.scatter(data['WS'], data['TModA'], alpha=0.5, label='WS vs TModA')
    plt.scatter(data['WS'], data['TModB'], alpha=0.5, label='WS vs TModB')
    plt.legend()
    plt.title("Wind Speed vs Temperature")
    plt.xlabel("Wind Speed (WS)")
    plt.ylabel("Temperature")
    plt.show()

### histograms_zscore

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore

def plot_histograms(data, columns):
    """
    Plot histograms for specified columns.
    """
    for col in columns:
        plt.figure()
        plt.hist(data[col], bins=20, alpha=0.7, label=col)
        plt.title(f"Histogram of {col}")
        plt.xlabel(col)
        plt.ylabel("Frequency")
        plt.show()

def calculate_z_scores(data, column):
    """
    Calculate Z-scores for a column.
    """
    return zscore(data[column])

### bubble_charts

In [None]:
import matplotlib.pyplot as plt

def plot_bubble_chart(data, x, y, size_col, color_col):
    """
    Plot a bubble chart to explore relationships between variables.
    """
    plt.figure(figsize=(10, 6))
    plt.scatter(data[x], data[y], s=data[size_col]*10, c=data[color_col], alpha=0.5, cmap='viridis')
    plt.colorbar(label=color_col)
    plt.xlabel(x)
    plt.ylabel(y)
    plt.title(f"Bubble Chart: {x} vs {y} (size={size_col})")
    plt.show()

### data_cleaning

In [None]:
def clean_data(data):
    """
    Handle missing values and anomalies.
    """
    data = data.dropna()
    print("Null values dropped.")
    return data

In [1]:
import sys
print(sys.version)

3.9.21 (main, Dec  3 2024, 17:50:13) 
[Clang 16.0.0 (clang-1600.0.26.4)]
