In [None]:
import pandas as pd
import numpy as np

def read_csv_and_preprocess(file_path, column_names):
    """
    Reads a CSV file, converts timestamps to datetime, and classifies each period as 'raise' or 'drop'.
    
    :param file_path: Path to the CSV file
    :param column_names: List of column names for the DataFrame
    :return: DataFrame with processed data
    """
    df = pd.read_csv(file_path, names=column_names, header=None, skiprows=1)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df['trend'] = np.where(df['close'] - df['open'] > 0, 'raise', 'drop')
    return df

def find_similar_period_groups(df, period_length=24, similarity_threshold=0.7):
    """
    Finds groups of periods that are similar, listing from highest similarity to lowest.
    
    :param df: DataFrame containing the processed data
    :param period_length: Length of each period to analyze
    :param similarity_threshold: Threshold for considering periods similar
    :return: A list of the most similar period groups, sorted by average similarity
    """
    period_similarities = []

    # Generate all period sequences and compare their trends
    for start in range(len(df) - period_length):
        current_period = df.iloc[start:start + period_length]['trend']
        
        for compare_start in range(start + period_length, len(df) - period_length):
            compare_period = df.iloc[compare_start:compare_start + period_length]['trend']
            
            # Calculate similarity
            similarity = sum(t1 == t2 for t1, t2 in zip(current_period, compare_period)) / period_length
            
            if similarity > similarity_threshold:
                period_similarities.append(((df.iloc[start]['timestamp'], df.iloc[compare_start]['timestamp']), similarity))

    # Group periods by similarity and calculate average similarity for each group
    similarity_groups = {}
    for (start1, start2), similarity in period_similarities:
        key = tuple(sorted([start1, start2]))
        if key in similarity_groups:
            similarity_groups[key].append(similarity)
        else:
            similarity_groups[key] = [similarity]

    # Sort groups by average similarity (descending)
    top_similar_groups = sorted(similarity_groups.items(), key=lambda x: -np.mean(x[1]))[:3]

    # Prepare and return the top 3 groups with their average similarity
    return [(group, np.mean(similarities)) for group, similarities in top_similar_groups]

# Define column names based on the observed structure
column_names = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'timestamp_end', 'volume_currency', 'trades_count', 'volume_buy', 'volume_buy_currency', 'ignored']

# Specify the file path to your CSV data
file_path = ''

# Process the CSV file
df = read_csv_and_preprocess(file_path, column_names)

# Find and print the top 3 similar period groups from highest to lowest similarity
top_three_similar_period_groups = find_similar_period_groups(df, period_length=24, similarity_threshold=0.7)

print("Top 3 similar period groups (highest to lowest similarity):")
for i, (periods, avg_similarity) in enumerate(top_three_similar_period_groups, start=1):
    period_times = ", ".join(str(period) for period in periods)
    print(f"{i}. Periods: {period_times} with average similarity: {avg_similarity:.2f}")
