# Data Exploration

This notebook explores the gesture data and dictionaries used for training the swipe keyboard model.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

%matplotlib inline

## Load Dictionaries

In [None]:
# Load dictionary files
data_dir = Path('../data/dictionaries')

languages = ['en_US', 'es_ES', 'fr_FR']
dictionaries = {}

for lang in languages:
    with open(data_dir / f'{lang}.txt', 'r') as f:
        dictionaries[lang] = [line.strip() for line in f.readlines()]
    print(f'{lang}: {len(dictionaries[lang])} words')

## Analyze Word Distributions

In [None]:
# Analyze word lengths
for lang, words in dictionaries.items():
    lengths = [len(word) for word in words]
    plt.figure(figsize=(10, 4))
    plt.hist(lengths, bins=20, alpha=0.7)
    plt.title(f'Word Length Distribution - {lang}')
    plt.xlabel('Word Length')
    plt.ylabel('Frequency')
    plt.show()
    print(f'{lang} - Mean length: {np.mean(lengths):.2f}, Std: {np.std(lengths):.2f}')