# Setup video

In [None]:
from esper.widget import *
import cv2
import random
import pickle
from esper.table_tennis.utils import *
from esper.table_tennis.video_annotation import *
from esper.table_tennis.pose_utils import *

In [None]:
video = Video.objects.filter(path__contains='men_single_quarter_final_2')[0]
video_id = video.id
video_ids = [video_id]
video.path

# Find frames with people (Pose)

In [None]:
LABELED_TAG, _ = Tag.objects.get_or_create(name='openpose:labeled')
frames_with_pose = Frame.objects.filter(video_id=video_id, tags=LABELED_TAG)
num_frame_with_pose = len(frames_with_pose)
print("{} frames contain poses out of {} frames ({:.02f}%)".format(num_frame_with_pose, video.num_frames,
                                                             100. * num_frame_with_pose/video.num_frames))

# Classify sport field Scene
Find long 2 people intervals, build classifier using RGB histogram

## Find frames with only two people

In [None]:
two_people_intrvlcol_short, two_people_intrvlcol_long = get_two_people_intrvlcol(video_id)

## Create montage 

In [None]:
https://olimar.stanford.edu/hdd/table_tennis/match_intervals = intrvlcol2list(match_intrvlcol.get_intervallist(video_id))
create_montage_from_intervals(match_intervals, out_path='/app/result/match_montage.jpg',
                  width=2160, num_cols=10,
                  )

create_video_supercut(match_intervals, '/app/result/match_supercut.mp4')

create_video_montage(match_intervals, '/app/result/match_montage.mp4', 
                     width=2160, num_cols=10)

## Classify match scene

In [None]:
with open('/app/data/histogram/{:07d}.bin'.format(video_id), 'rb') as f:
    feature_all = np.frombuffer(f.read(), dtype=np.int).reshape((-1, 3*16))

In [None]:
two_people_intervals_long = intrvlcol2list(two_people_intrvlcol_long, with_duration=True, sort_by_duration=True)

In [None]:
# not work 
from esper.supercut import manual_select_candidates
manual_select_candidates(interval2result(two_people_intervals_long))

In [None]:
NUM_SAMPLE_TRAIN = 1000

In [None]:
# positive samples
selected = [0, 2, 3, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12, 16, 17, 18, 19, 23, 22, 21, 20, 24, 25, 26, 27, 31, 29, 32, 33, 34, 35, 39, 38, 36, 40, 41, 43, 47, 45, 49, 48]
positive_intervals = [match_intervals[id] for id in selected]
num_sample = 0
POSITIVE_STRIDE = 5
# positive_fids_all = [fid for (vid, sfid, efid, duration) in positive_intervals 
#                  for fid in range(sfid+5, efid-5, POSITIVE_STRIDE)]
positive_fids_all = [fid for (vid, sfid, efid, duration) in positive_intervals 
                 for fid in range(sfid, efid)]
print("Total positive samples ", len(positive_fids_all))
random.shuffle(positive_fids_all)
positive_fids = positive_fids_all[: NUM_SAMPLE_TRAIN]
positive_feature = [feature_all[fid] for fid in positive_fids]
positive_label = [1] * NUM_SAMPLE_TRAIN

In [None]:
# obsolete
positive_frames = VideoIntervalCollection({video_id: IntervalList([(fid, fid, 0) for fid in positive_fids])})
esper_widget(intrvlcol2result(positive_frames, flat=True))

In [None]:
create_montage_from_intervals([(video_id, fid, fid) for fid in positive_fids], out_path='/app/result/positive_montage.jpg',
                  width=2160, num_cols=35)

In [None]:
# negative samples
# NEGATIVE_STRIDE = 20
# video_intrvlcol = VideoIntervalCollection({video_id : IntervalList([(0, video.num_frames-1, 0)])})
# non_match_intrvlcol = video_intrvlcol.minus(two_people_intrvlcol_all)
# negative_intervals = intrvlcol2list(non_match_intrvlcol)
# negative_fids_all = [fid for (vid, sfid, efid, duration) in negative_intervals 
#                     for fid in range(sfid+5, efid-5, NEGATIVE_STRIDE)]
positive_fids_large = [fid for interval in two_people_intrvlcol_all.get_intervallist(video_id).get_intervals() 
                          for fid in range(interval.start, interval.end+1)]
negative_fids_all = [fid for fid in range(0, video.num_frames)
                    if fid not in positive_fids_large]
print("Total negative samples ", len(negative_fids_all))
random.shuffle(negative_fids_all)
negative_fids = negative_fids_all[: NUM_SAMPLE_TRAIN]
negative_feature = [feature_all[fid] for fid in negative_fids]
negative_label = [0] * NUM_SAMPLE_TRAIN

In [None]:
# obsolete
negative_frames = VideoIntervalCollection({video_id: IntervalList([(fid, fid, 0) for fid in negative_fids])})
esper_widget(intrvlcol2result(negative_frames, flat=True))

In [None]:
negative_frames = Pose.objects.filter(frame__video_id=video_id, frame__number__in=negative_fids)
len(negative_frames)
# obsolete
esper_widget(qs_to_result(negative_frames, stride=1, limit=1000))

In [None]:
create_montage_from_intervals([(video_id, fid, fid) for fid in negative_fids], out_path='/app/result/negative_montage.jpg',
                  width=2160, num_cols=35)

In [None]:
# train model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=100)

# Train the model using the training sets
model.fit(positive_feature + negative_feature, positive_label + negative_label)

In [None]:
#Predict Output
predicted_label = model.predict(feature_all) 

In [None]:
pickle.dump(list(predicted_label), open('/app/data/pkl/match_scene_cls.pkl', 'wb'))

# Segment sport field scene into clip 
Here are two different algorithms

## Filter on two people intervals

In [None]:
MATCH_THRESH = 0.9
def filter_by_histogram(interval):
    sfid, efid = interval.start, interval.end
    predicted_interval = [predicted_label[fid] for fid in range(sfid, efid)]
    return 1. * np.sum(predicted_interval) / (efid - sfid) > MATCH_THRESH
match_scene_intrvlcol = two_people_intrvlcol.filter(filter_by_histogram)
count_intervals(match_scene_intrvlcol)

In [None]:
# obsolete
esper_widget(intrvlcol2result(match_scene_intrvlcol, flat=True), use_jupyter_keybindings=True)

In [None]:
create_montage_from_intervals(intrvlcol2list(match_scene_intrvlcol, with_duration=False), out_path='/app/result/match_scene_montage.jpg',
                  width=2160, num_cols=10)

In [None]:
pickle.dump(match_scene_intrvlcol, open('/app/result/match_scene_intrvlcol.pkl', 'wb'))

## Merge from classified match scene

In [None]:
match_scene_cls = pickle.load(open('/app/data/pkl/match_scene_cls.pkl', 'rb'))

In [None]:
intervals = [Interval(Bounds3D(fid, fid+1)) for fid, cls in enumerate(match_scene_cls) if cls]
match_scene_is = IntervalSet(intervals).dilate(5).coalesce(('t1', 't2'), Bounds3D.span).dilate(-5).filter_size(min_size=25)
match_scene_is.size()

In [None]:
for idx, intervalSet in enumerate(match_scene_is.get_intervals()):
    print(idx, intervalSet)

In [None]:
match_scene_ism = IntervalSetMapping({video_id: match_scene_is})

In [None]:
count_duration(match_scene_ism)

In [None]:
IntervalSetMapping_to_vgrid(IntervalSetMapping_frame_to_second(match_scene_ism), flat=True)

# Detect Hit

## Detect sound peak

In [None]:
hit_dict_split = pickle.load(open('/app/data/pkl/hit_annotation.pkl', 'rb'))

In [None]:
point = hit_dict_split['Tabletennis_2012_Olympics_men_single_final_gold']['HW'][4]
sfid, efid = point[0]['fid'], point[-1]['fid']
sfid -= 25
efid += 25
hit_time = [(hit['fid'] - sfid) / video.fps for hit in point]
hit_time

In [None]:
audio_path = '/app/tmp/test_audio.wav'
video_path = '/app/tmp/test_video.mp4'
video.extract_audio(segment=(1.*sfid/video.fps, 1.*efid/video.fps), output_path=audio_path)
video.download(segment=(1.*sfid/video.fps, 1.*efid/video.fps), output_path=video_path)

In [None]:
# analyze sound
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import find_peaks


fs, audio_signal = wavfile.read(audio_path)
audio_signal = audio_signal.max(axis=1)
audio_signal_filter = butter_highpass_filter(audio_signal, 1000, fs)

# from peakdetect import peakdetect
# peaks = peakdetect(audio_signal, lookahead=100)

# from scipy.signal import find_peaks_cwt
# indexes = find_peaks_cwt(audio_signal, np.arange(len(audio_signal)))

peaks, _ = find_peaks(audio_signal_filter, prominence=1, distance=fs/20, height=500)

audio_time = np.linspace(0, len(audio_signal)/fs, num=len(audio_signal))

plt.figure()
plt.plot(audio_time, audio_signal_filter)
plt.scatter(hit_time, [2000] * len(hit_time), c='r', linewidths=2)

plt.plot(peaks / fs, audio_signal_filter[peaks], 'gx', markersize=10)

plt.show()

peak_fid = [int(np.round(1. * p / fs * video.fps + sfid)) for p in peaks]
peak_fid

In [None]:
window_size = 200

def get_frequency_spectrum(y, fs): 
    n = len(y) # length of the signal
    k = np.arange(n)
    T = n / fs
    frq = k / T # two sides frequency range
    frq = frq[range(n//2)] # one side frequency range

    Y = np.fft.fft(y)/n # fft computing and normalization
    Y = Y[range(n//2)]
    return frq, abs(Y)

plt.figure()
for i in range(10):
    frq, Y = get_frequency_spectrum(audio_signal[peaks[i] - window_size: peaks[i] + window_size], fs)
    plt.plot(frq, Y, label=str(i))
plt.xlim([0, 10000])
plt.ylim([0, 600])
plt.legend()
    
# frq, Y = get_frequency_spectrum(audio_signal[peaks[8] - window_size: peaks[8] + window_size], fs)
# plt.plot(frq, Y,'r') 
# frq, Y = get_frequency_spectrum(audio_signal[peaks[9] - window_size: peaks[9] + window_size], fs)
# plt.plot(frq, Y,'b') 
# frq, Y = get_frequency_spectrum(audio_signal[peaks[10] - window_size: peaks[10] + window_size], fs)
# plt.plot(frq, Y,'g') 
# frq, Y = get_frequency_spectrum(audio_signal[peaks[13] - window_size: peaks[13] + window_size], fs)
# plt.plot(frq, Y,'y') 

## Detect foot motion

In [None]:
match_scene_intrvlcol = pickle.load(open('/app/result/match_scene_intrvlcol.pkl', 'rb'))
match_scene_intervals = intrvlcol2list(match_scene_intrvlcol, sort_by_duration=True)

In [None]:
esper_widget(interval2result(match_scene_intervals[9:10]), disable_caption=True)

In [None]:
sfid, efid = match_scene_intervals[9][1:3]
# video.download(segment=(sfid/video.fps, efid/video.fps), output_path='/app/tmp/point.mp4')

In [None]:
joints_to_plot = [Pose.Neck, Pose.LShoulder, Pose.LElbow, Pose.LWrist, Pose.LHip, Pose.LKnee, Pose.LAnkle]
joints_to_plot_name = ['Neck', 'LShoulder', 'LElbow', 'LWrist', 'LHip', 'LKnee', 'LAnkle']
def get_trajectory(fid2pose):
    trajectory_X = {joint: [] for joint in joints_to_plot}
    trajectory_Y = {joint: [] for joint in joints_to_plot}
    xtick_X = {joint: [] for joint in joints_to_plot}
    xtick_Y = {joint: [] for joint in joints_to_plot}
    for fid in sorted(fid2pose):
        keypoints = fid2pose[fid]._format_keypoints()
        for joint in joints_to_plot:
            X = keypoints[joint][0]
            if 0 < X and X < 1: 
                trajectory_X[joint].append(X)
                xtick_X[joint].append(fid)
            Y = keypoints[joint][1]
            if 0 < Y and Y < 1:
                trajectory_Y[joint].append(Y)
                xtick_Y[joint].append(fid)
    return xtick_X, xtick_Y, trajectory_X, trajectory_Y

In [None]:
plt.figure()
xtick_X, xtick_Y, trajectory_X, trajectory_Y = get_trajectory(foreground_pose)
for id, joint in enumerate(joints_to_plot):
    plt.plot(xtick_X[joint], trajectory_X[joint], label=joints_to_plot_name[id] + 'X')
    plt.plot(xtick_Y[joint], trajectory_Y[joint], '--', label=joints_to_plot_name[id] + 'Y')
legend = plt.legend(loc='lower right')
plt.figure()
xtick_X, xtick_Y, trajectory_X, trajectory_Y = get_trajectory(background_pose)
for id, joint in enumerate(joints_to_plot):
    plt.plot(xtick_X[joint], trajectory_X[joint], label=joints_to_plot_name[id] + 'X')
    plt.plot(xtick_Y[joint], trajectory_Y[joint], '--', label=joints_to_plot_name[id] + 'Y')
legend = plt.legend(loc='lower right')foreground_pose

# Separate foreground/background

In [None]:
from esper.table_tennis.parse_match import *

In [None]:
match_scene_intrvlcol = pickle.load(open('/app/result/match_scene_intrvlcol.pkl', 'rb'))
match_scene_intervals = intrvlcol2list(match_scene_intrvlcol, sort_by_duration=True)

In [None]:
foreground_pose, background_pose = group_pose_from_interval(match_scene_intervals[9])

In [None]:
hist_foreground_train = collect_histogram(foreground_pose)
hist_background_train = collect_histogram(background_pose)

In [None]:
# train model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=100)

# Train the model using the training sets
model.fit(hist_foreground_train + hist_background_train, 
          [1]*len(hist_foreground_train) + [0]*len(hist_background_train))

In [None]:
match_scene_intrvlcol = pickle.load(open('/app/result/match_scene_intrvlcol.pkl', 'rb'))
match_scene_intervals_foreground = []
match_scene_intervals_background = []
num_sample = 50

for interval in match_scene_intervals:
    foreground_pose, background_pose = group_pose_from_interval(interval)
    if len(foreground_pose) < num_sample:
        continue
    pose_list_sample = random.sample(foreground_pose, num_sample)
    hist_foreground_test = collect_histogram(pose_list_sample)
    
    predicted_label = model.predict(hist_foreground_test)
    if 1. * np.sum(predicted_label) / len(predicted_label) > 0.5:
        match_scene_intervals_foreground.append(interval)
    else:
        match_scene_intervals_background.append(interval)
    print("Foreground", 1. * np.sum(predicted_label) / len(predicted_label))
        

In [None]:
match_scene_intervals_dict = {'HW_foreground': match_scene_intervals_foreground,
                              'JZ_foreground': match_scene_intervals_background}
pickle.dump(match_scene_intervals_dict, open('/app/result/match_scene_intervals_dict.pkl', 'wb'))

In [None]:
create_montage(match_scene_intervals_foreground, out_path='/app/result/foreground_montage.jpg',
                  width=2160, num_cols=6)

In [None]:
create_montage(match_scene_intervals_background, out_path='/app/result/background_montage.jpg',
                  width=2160, num_cols=6)