In [1]:
import os
import re
import numpy as np
import math as mt
import pandas as pd
import tqdm.notebook as tn
from random import random

from scipy.stats import linregress

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
sns.set_theme()

In [3]:
data_folder = 'Data'
file_name = '2019.04.08 PUC19_2019.04.01 PUC19_mica.dat'
file_path = os.path.join(data_folder, file_name)

In [4]:
class LinearMath:
    @staticmethod
    def make_vector(p1, p2):
        """вектор из точек"""
        return p2[0] - p1[0], p2[1] - p1[1]

    @staticmethod
    def get_len(v):
        """длина вектора"""
        return mt.sqrt(v[0] ** 2 + v[1] ** 2)
    
    @staticmethod
    def get_angle(v1, v2, precision=0.001, return_cos=False):
        """угол между двумя векторами"""
    #     sign = v1[0] * v2[1] - v1[1] * v2[0]
    #     try:
    #         sign = sign / abs(sign)
    #     except:
    #         return 0
        if return_cos == True:
            cos  = ((v1[0] * v2[0] + v1[1] * v2[1]) /
                (LinearMath.get_len(v1) * LinearMath.get_len(v2)))
            return cos
        else:
            try:
                cos  = ((v1[0] * v2[0] + v1[1] * v2[1]) /
                    (LinearMath.get_len(v1) * LinearMath.get_len(v2)))
                return mt.acos(cos)

            except Exception as error: 
                if abs(cos - 1) < precision:
                    cos = 1 
                    return mt.acos(cos)
                elif abs(cos + 1) < precision:
                    cos = -1 
                    return mt.acos(cos)
                else:
                    print('cos: ', cos, '\n', error)
                    return 0
    
def avg_angle(vector_list, max_n=None, return_cos=False):
    """
    вычисление среднего угла между векторами массива vector_list
    возвращает DataFrame:
        возвращает два np массива: расстояние между точками - угол между точками
    """
    if max_n == None:
#         max_n = int(len(vector_list)/2)
        max_n = len(vector_list)
    max_point = len(vector_list)
    
    df = pd.DataFrame()
    angle = []
    distance = []
    for n in (range(max_n)):
        for i in range(max_point-n):
            angle.append(LinearMath.get_angle(vector_list[n], vector_list[n+i], return_cos=return_cos))
            if i == 0:
                distance.append(0)
            else:
                distance.append(distance[-1] + LinearMath.get_len(vector_list[i]))
#         avg_angle    = (np.array(angle)).mean()
#         avg_angle_sq = (np.array(angle)**2).mean()
#         avg_cos      = np.cos(np.array(angle)).mean()
#         df = df.append({'distance':n, 'avg_angle_sq':avg_angle_sq, 'avg_cos':avg_cos, 'avg_angle':avg_angle}, ignore_index='True')
    return np.array(distance, dtype=float, copy=False), np.array(angle, dtype=float, copy=False)

In [20]:
def parse_data(file_path):
    file = open(file_path)
    values = file.read().split("\n")
    data = []
#     data = np.empty(1)
    for chain in values:
        chain_split = re.findall(r"\(.*?\)", chain)
        one_chain = []
        if chain_split != []:
            for point in chain_split:
                one_chain.append(point[1:-1].split(','))
            data.append(one_chain)
    return data

def create_df_by_list(list_chain):
    return pd.DataFrame(list_chain, dtype=float).rename(columns={0:'x', 1:'y'})

def multichain_list_to_frame(chain_list, calc='cos'):   
    if calc == 'cos':
        return_cos = True
    elif calc == 'angle':
        return_cos = False
    else:
        print("error name 'calc'")
        return None
    df_final = pd.DataFrame()
    for n, chain_list in tn.tqdm(enumerate(multy_chain)):
        chain = np.array(chain_list, dtype=float)
        vectors = []
        for i in (range(len(chain)-1)):
            vectors.append(LinearMath.make_vector(chain[i], chain[i+1]))
        distance, angle = avg_angle(vectors, return_cos=return_cos)
        df_tmp = pd.DataFrame(data=np.dstack((distance, angle))[0], columns=['distance', 'angle'])
        df_tmp['chain_num'] = n
        df_final = df_final.append(df_tmp)
    return df_final

In [63]:
def plot_chain(chain_list, max_chains=5):
    fig = plt.figure(figsize=(18, 12))
    for i, chain in enumerate(chain_list):
        chain_df = create_df_by_list(chain)
        plt.plot(chain_df['x'], chain_df['y'])
        if i == max_chains:
            break

In [64]:
multy_chain = parse_data(file_path)
plot_chain(multy_chain, max_chains=10)

AttributeError: 'Figure' object has no attribute 'plot'

<Figure size 1296x864 with 0 Axes>

In [54]:
df = multichain_list_to_frame(multy_chain, calc='angle')

0it [00:00, ?it/s]

In [55]:
df.head()

Unnamed: 0,distance,angle,chain_num
0,0.0,1.490116e-08,0
1,5.459584,0.2782409,0
2,10.09232,0.4202199,0
3,13.545123,0.8840723,0
4,17.71682,0.3831437,0


In [56]:
df_work = df.copy()
df_work['distance'] = df_work['distance'].round(-1)
# group_by = df_work.groupby('distance')['angle'].mean().reset_index()
group = df_work.groupby('distance').agg({'angle':['mean', 'count']}).reset_index()
group.columns = ['distance', 'ang_mean', 'count']
group[:5]

Unnamed: 0,distance,ang_mean,count
0,0.0,0.182492,22128
1,10.0,0.51584,29921
2,20.0,0.631541,28468
3,30.0,0.737752,26936
4,40.0,0.835325,25319


In [57]:
x=group['distance']
y=group['ang_mean']

fig = go.Figure()
fig.add_trace(go.Scatter(x=group['distance'], y=group['ang_mean'], mode='lines+markers',  name='cos'
                         , marker=dict(color=group['count']/group['count'].sum()
                                         , colorbar=dict(title="count")
                                         , colorscale='Inferno')
                        ))

fig.update_layout(legend_orientation="h",
                  legend=dict(x=.5, xanchor="center"),
                  margin=dict(l=0, r=0, t=0, b=0))
fig.update_traces(hoverinfo="all", hovertemplate="Расстояние: %{x}<br>Значение: %{y}")
fig.show()

In [60]:
max_n = 200
x = df[df['distance'] < max_n]['distance']
y = df[df['distance'] < max_n]['angle']
lineregress_values = linregress(x, y)

slope_θ2 = lineregress_values.slope
stderr_θ2 = lineregress_values.stderr
intercept_θ2 = lineregress_values.intercept

print(f'slope: {slope_θ2} +- {stderr_θ2}')
print(f'p: {1/slope_θ2} +- {1/slope_θ2**2 * stderr_θ2}')

slope: 0.00631256199765287 +- 2.035767874853926e-05
p: 158.41428573245204 +- 0.5108776974102847


In [61]:
fig = go.Figure()
data = group[group['distance']<max_n]
fig.add_trace(go.Scatter(
    x=data['distance'],
    y=data['ang_mean'],
    name="data"
))

fig.add_trace(go.Scatter(
    x=[1, max_n],
    y=[1*slope_θ2 + intercept_θ2, max_n*slope_θ2 + intercept_θ2],
    mode="lines",
    line=dict(color='red', width=2,
                              dash='dash'),
    name="lineregression"
    ))

fig.update_xaxes(range=[0, max_n])
fig.update_yaxes(range=[0, data['ang_mean'].max()])

fig.update_layout(title="Средний квадрат угла от длины сегмента",
                xaxis_title="l",
                yaxis_title="θ<sup>2</sup>",
                width=600,
                height=600,
                legend=dict(
                yanchor="top",
                y=0.99,
                xanchor="left",
                x=0.01
))

fig.add_annotation(x=100, y=0.2,
            text=f'p = {1/slope_θ2:.2f} ± {1/slope_θ2**2 * stderr_θ2:.2f}',
            showarrow=False,
            bordercolor="#c7c7c7",
            bgcolor="white",
            font=dict(
            size=20,
            )
                  )

fig.show()